[CRIU] [PATCHv2] page-xfer: Be more verbose on error-paths

Dmitry Safonov dsafonov at virtuozzo.com
Thu Jun 22 22:21:55 MSK 2017


On 06/22/2017 09:12 PM, Dmitry Safonov wrote:
> There is a lack of logging in page-xfer and I've an issue where
> page-server closed session, but it's unclear why:
> 
>> (03.571793) Punch!/(nil)/4096/
>> (03.572431) pr331:New bunch/(nil)/0/
>> (03.572442) Session over

Hmm, I managed to miss that this is called on
page_server_close() => close_page_read() => punch_hole()

So at least, where is error in my bug is a bit clearer.
But still this patch may be worth applying.

> 
> Log errors to help debugging page server.
> 
> Signed-off-by: Dmitry Safonov <dsafonov at virtuozzo.com>
> ---
> v2: It's OK if the socket to page-server is closed - that is part
>      of API to terminate connection like that.
> 
>   criu/page-xfer.c | 50 ++++++++++++++++++++++++++++++++++++++------------
>   1 file changed, 38 insertions(+), 12 deletions(-)
> 
> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
> index 49693bb179d6..07aff7289c12 100644
> --- a/criu/page-xfer.c
> +++ b/criu/page-xfer.c
> @@ -284,11 +284,14 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
>   	u32 pages_id;
>   
>   	xfer->pmi = open_image(fd_type, O_DUMP, id);
> -	if (!xfer->pmi)
> +	if (!xfer->pmi) {
> +		pr_err("Failed to open image file\n");
>   		return -1;
> +	}
>   
>   	xfer->pi = open_pages_image(O_DUMP, xfer->pmi, &pages_id);
>   	if (!xfer->pi) {
> +		pr_err("Failed to open pages image file\n");
>   		close_image(xfer->pmi);
>   		return -1;
>   	}
> @@ -329,6 +332,7 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, long id)
>   		if (!xfer->parent) {
>   			if (!opts.remote)
>   				close(pfd);
> +			pr_err("xmalloc() failed to allocate parent\n");
>   			return -1;
>   		}
>   
> @@ -468,9 +472,10 @@ int check_parent_local_xfer(int fd_type, int id)
>   
>   	if (opts.remote) {
>   		pfd = get_curr_parent_snapshot_id_idx();
> -		pr_err("Unable to get parent snapshot id\n");
> -		if (pfd == -1)
> +		if (pfd == -1) {
> +			pr_err("Unable to get parent snapshot id\n");
>   			return -1;
> +		}
>   	} else {
>   		pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY);
>   		if (pfd < 0 && errno == ENOENT)
> @@ -499,8 +504,10 @@ static int page_server_check_parent(int sk, struct page_server_iov *pi)
>   	id = decode_pm_id(pi->dst_id);
>   
>   	ret = check_parent_local_xfer(type, id);
> -	if (ret < 0)
> +	if (ret < 0) {
> +		pr_err("check_parent_local_xfer() failed: %d\n", ret);
>   		return -1;
> +	}
>   
>   	if (write(sk, &ret, sizeof(ret)) != sizeof(ret)) {
>   		pr_perror("Unable to send response");
> @@ -580,8 +587,10 @@ static int page_server_open(int sk, struct page_server_iov *pi)
>   
>   	page_server_close();
>   
> -	if (open_page_local_xfer(&cxfer.loc_xfer, type, id))
> +	if (open_page_local_xfer(&cxfer.loc_xfer, type, id)) {
> +		pr_err("open_page_local_xfer() failed\n");
>   		return -1;
> +	}
>   
>   	cxfer.dst_id = pi->dst_id;
>   
> @@ -619,8 +628,10 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
>   		return -1;
>   
>   	psi2iovec(pi, &iov);
> -	if (lxfer->write_pagemap(lxfer, &iov, flags))
> +	if (lxfer->write_pagemap(lxfer, &iov, flags)) {
> +		pr_err("Failed to write_pagemap()\n");
>   		return -1;
> +	}
>   
>   	if (!(flags & PE_PRESENT))
>   		return 0;
> @@ -653,8 +664,10 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags)
>   			return -1;
>   		}
>   
> -		if (lxfer->write_pages(lxfer, cxfer.p[0], chunk))
> +		if (lxfer->write_pages(lxfer, cxfer.p[0], chunk)) {
> +			pr_err("Failed to write_pages()\n");
>   			return -1;
> +		}
>   
>   		len -= chunk;
>   	}
> @@ -674,8 +687,10 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
>   
>   	ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr,
>   			     &pi->nr_pages, PPB_LAZY);
> -	if (ret)
> +	if (ret) {
> +		pr_err("page_pipe_read() failed: %d\n", ret);
>   		return ret;
> +	}
>   
>   	if (pi->nr_pages == 0) {
>   		/* no iovs found means we've hit a zero page */
> @@ -689,8 +704,10 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
>   		return -1;
>   
>   	ret = splice(pipe_read_dest.p[0], NULL, sk, NULL, len, SPLICE_F_MOVE);
> -	if (ret != len)
> +	if (ret != len) {
> +		pr_perror("Failed to splice()");
>   		return -1;
> +	}
>   
>   	tcp_nodelay(sk, true);
>   
> @@ -729,11 +746,13 @@ static int page_server_serve(int sk)
>   		u32 cmd;
>   
>   		ret = recv(sk, &pi, sizeof(pi), MSG_WAITALL);
> -		if (!ret)
> +		if (!ret) {
> +			pr_info("Other end of socket is closed, session end\n");
>   			break;
> +		}
>   
>   		if (ret != sizeof(pi)) {
> -			pr_perror("Can't read pagemap from socket");
> +			pr_perror("Can't read pageserver's command from socket, recv() = %d", ret);
>   			ret = -1;
>   			break;
>   		}
> @@ -788,8 +807,15 @@ static int page_server_serve(int sk)
>   			break;
>   		}
>   
> -		if (ret || (pi.cmd == PS_IOV_FLUSH_N_CLOSE))
> +		if (ret) {
> +			pr_err("Ending session due to ret code %d\n", ret);
>   			break;
> +		}
> +
> +		if (pi.cmd == PS_IOV_FLUSH_N_CLOSE) {
> +			pr_info("Closing session with flush-close cmd\n");
> +			break;
> +		}
>   	}
>   
>   	if (receiving_pages && !ret && !flushed) {
> 


-- 
              Dmitry


More information about the CRIU mailing list