[CRIU] [PATCH 1/3] page-pipe: split dumping memory on chunks (v2)

Andrey Vagin avagin at openvz.org
Mon Feb 3 03:19:35 PST 2014


The problem is that vmsplice() to a big pipe fails very often.

The kernel allocates the linear chunk of memory for pipe buffer
descriptos, but big allocation in kernel can fail.

So we need to restrict maximal capacity of pipes. But the number of
pipes is restricted too, so we need to split dumping memory on chunks.

In this patch we calculates the pipe size for which vmsplice() will not
fail.

v2: s/batch/chunk and a few other small fixes
Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 include/page-pipe.h |  13 +++++++
 page-pipe.c         | 107 ++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 112 insertions(+), 8 deletions(-)

diff --git a/include/page-pipe.h b/include/page-pipe.h
index 393627b..041c0e7 100644
--- a/include/page-pipe.h
+++ b/include/page-pipe.h
@@ -78,6 +78,9 @@ struct page_pipe_buf {
 	struct list_head l;	/* links into page_pipe->bufs */
 };
 
+struct page_pipe;
+typedef int (page_pipe_chunk_cb)(struct page_pipe *pp, void *args);
+
 struct page_pipe {
 	unsigned int nr_pipes;	/* how many page_pipe_bufs in there */
 	struct list_head bufs;	/* list of bufs */
@@ -89,6 +92,11 @@ struct page_pipe {
 	unsigned int nr_holes;	/* number of holes allocated */
 	unsigned int free_hole;	/* number of holes in use */
 	struct iovec *holes;	/* holes */
+
+	unsigned int pipe_max_size; /* max allowable pipe capacity */
+
+	page_pipe_chunk_cb *chunk_cb; /* callback to handle one chunk of data */
+	void *chunk_args;	/* arguments for the callback */
 };
 
 extern struct page_pipe *create_page_pipe(unsigned int nr, struct iovec *);
@@ -97,5 +105,10 @@ extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr);
 extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr);
 
 extern void debug_show_page_pipe(struct page_pipe *pp);
+void page_pipe_close_buf(struct page_pipe *pp, struct page_pipe_buf *ppb);
+void page_pipe_cleanup(struct page_pipe *pp);
+void page_pipe_start_chunk(struct page_pipe *pp,
+				page_pipe_chunk_cb *cb, void *args);
+int page_pipe_complete_chunk(struct page_pipe *pp);
 
 #endif /* __CR_PAGE_PIPE_H__ */
diff --git a/page-pipe.c b/page-pipe.c
index bfe1989..b2f2928 100644
--- a/page-pipe.c
+++ b/page-pipe.c
@@ -1,5 +1,6 @@
 #include <unistd.h>
 #include <fcntl.h>
+#include <limits.h>
 
 #undef LOG_PREFIX
 #define LOG_PREFIX "page-pipe: "
@@ -7,6 +8,29 @@
 #include "util.h"
 #include "page-pipe.h"
 
+static int open_pipe(struct page_pipe *pp, int *fd)
+{
+	if (pipe(fd)) {
+		pr_perror("Can't make pipe for page-pipe");
+		return -1;
+	}
+
+	pp->nr_pipes++;
+
+	return 0;
+}
+
+void page_pipe_close_buf(struct page_pipe *pp, struct page_pipe_buf *ppb)
+{
+	if (ppb->p[0] == -1)
+		return;
+
+	close_safe(&ppb->p[0]);
+	close_safe(&ppb->p[1]);
+
+	pp->nr_pipes--;
+}
+
 static int page_pipe_grow(struct page_pipe *pp)
 {
 	struct page_pipe_buf *ppb;
@@ -17,7 +41,7 @@ static int page_pipe_grow(struct page_pipe *pp)
 	if (!ppb)
 		return -1;
 
-	if (pipe(ppb->p)) {
+	if (open_pipe(pp, ppb->p)) {
 		xfree(ppb);
 		pr_perror("Can't make pipe for page-pipe");
 		return -1;
@@ -29,7 +53,6 @@ static int page_pipe_grow(struct page_pipe *pp)
 	ppb->iov = &pp->iovs[pp->free_iov];
 
 	list_add_tail(&ppb->l, &pp->bufs);
-	pp->nr_pipes++;
 
 	return 0;
 }
@@ -52,13 +75,52 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs)
 		pp->free_hole = 0;
 		pp->holes = NULL;
 
-		if (page_pipe_grow(pp))
-			return NULL;
+		pp->pipe_max_size = UINT_MAX;
 	}
 
 	return pp;
 }
 
+/* The number of pipes for one batch request */
+#define NR_PIPES_PER_BATCH 8
+
+#define PAGE_ALLOC_COSTLY_ORDER 3 /* from the kernel source code */
+struct kernel_pipe_buffer {
+        struct page *page;
+        unsigned int offset, len;
+        const struct pipe_buf_operations *ops;
+        unsigned int flags;
+        unsigned long private;
+};
+
+void page_pipe_start_chunk(struct page_pipe *pp,
+				page_pipe_chunk_cb *cb, void *args)
+{
+	BUG_ON(!list_empty(&pp->bufs));
+
+	/*
+	 * The kernel allocates the linear chunk of memory for pipe buffers.
+	 * Allocation of chunks with size more than PAGE_ALLOC_COSTLY_ORDER
+	 * fails very often, so we need to restrict the pipe capacity to not
+	 * allocate big chunks.
+	 */
+	pp->pipe_max_size = (1 << PAGE_ALLOC_COSTLY_ORDER) * PAGE_SIZE /
+					sizeof(struct kernel_pipe_buffer);
+
+	pp->chunk_cb	= cb;
+	pp->chunk_args	= args;
+}
+
+int page_pipe_complete_chunk(struct page_pipe *pp)
+{
+	int ret;
+
+	ret = pp->chunk_cb(pp, pp->chunk_args);
+	page_pipe_cleanup(pp);
+
+	return ret;
+}
+
 void destroy_page_pipe(struct page_pipe *pp)
 {
 	struct page_pipe_buf *ppb, *n;
@@ -66,14 +128,28 @@ void destroy_page_pipe(struct page_pipe *pp)
 	pr_debug("Killing page pipe\n");
 
 	list_for_each_entry_safe(ppb, n, &pp->bufs, l) {
-		close(ppb->p[0]);
-		close(ppb->p[1]);
+		page_pipe_close_buf(pp, ppb);
 		xfree(ppb);
 	}
 
 	xfree(pp);
 }
 
+void page_pipe_cleanup(struct page_pipe *pp)
+{
+	struct page_pipe_buf *ppb, *n;
+
+	pr_debug("Clean up page pipe\n");
+
+	list_for_each_entry_safe(ppb, n, &pp->bufs, l) {
+		page_pipe_close_buf(pp, ppb);
+		list_del(&ppb->l);
+		xfree(ppb);
+	}
+
+	pp->free_hole = 0;
+}
+
 #define PPB_IOV_BATCH	8
 
 static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *ppb,
@@ -82,9 +158,13 @@ static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *pp
 	struct iovec *iov;
 
 	if (ppb->pages_in == ppb->pipe_size) {
+		unsigned long new_size = ppb->pipe_size << 1;
 		int ret;
 
-		ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (ppb->pipe_size * PAGE_SIZE) << 1);
+		if (new_size > pp->pipe_max_size)
+			return 1;
+
+		ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
 		if (ret < 0)
 			return 1; /* need to add another buf */
 
@@ -122,7 +202,9 @@ out:
 
 static inline int try_add_page(struct page_pipe *pp, unsigned long addr)
 {
-	BUG_ON(list_empty(&pp->bufs));
+	if (list_empty(&pp->bufs))
+		return 1;
+
 	return try_add_page_to(pp, list_entry(pp->bufs.prev, struct page_pipe_buf, l), addr);
 }
 
@@ -134,6 +216,15 @@ int page_pipe_add_page(struct page_pipe *pp, unsigned long addr)
 	if (ret <= 0)
 		return ret;
 
+	if (pp->chunk_cb && pp->nr_pipes == NR_PIPES_PER_BATCH) {
+		ret = pp->chunk_cb(pp, pp->chunk_args);
+		page_pipe_cleanup(pp);
+
+		if (ret)
+			return ret;
+
+	}
+
 	ret = page_pipe_grow(pp);
 	if (ret < 0)
 		return ret;
-- 
1.8.5.3



More information about the CRIU mailing list