[CRIU] [PATCH 1/3] page-pipe: split dumping memory on chunks (v3)

Andrey Vagin avagin at openvz.org
Sat Feb 8 11:16:43 PST 2014


The problem is that vmsplice() to a big pipe fails very often.

The kernel allocates a linear chunk of memory for pipe buffer
descriptos, but a big allocation in kernel can fail.

So we need to restrict maximal capacity of pipes. But the number of
pipes is restricted too, so we need to split dumping memory on chunks.

In this patch we calculates the pipe size for which vmsplice() will not
fail.

v2: s/batch/chunk and a few other small fixes
v3: Remove callbacks from page_pipes and reuse pipes
Signed-off-by: Andrey Vagin <avagin at openvz.org>
---
 include/page-pipe.h |  8 ++++++-
 mem.c               |  2 +-
 page-pipe.c         | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 shmem.c             |  2 +-
 4 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/include/page-pipe.h b/include/page-pipe.h
index 393627b..a2dc268 100644
--- a/include/page-pipe.h
+++ b/include/page-pipe.h
@@ -81,6 +81,7 @@ struct page_pipe_buf {
 struct page_pipe {
 	unsigned int nr_pipes;	/* how many page_pipe_bufs in there */
 	struct list_head bufs;	/* list of bufs */
+	struct list_head free_bufs;	/* list of bufs */
 	unsigned int nr_iovs;	/* number of iovs */
 	unsigned int free_iov;	/* first free iov */
 	struct iovec *iovs;	/* iovs. They are provided into create_page_pipe
@@ -89,13 +90,18 @@ struct page_pipe {
 	unsigned int nr_holes;	/* number of holes allocated */
 	unsigned int free_hole;	/* number of holes in use */
 	struct iovec *holes;	/* holes */
+
+	bool chunk_mode;	/* Restrict the maximum buffer size of pipes
+				   and dump memory for a few iterations */
 };
 
-extern struct page_pipe *create_page_pipe(unsigned int nr, struct iovec *);
+extern struct page_pipe *create_page_pipe(unsigned int nr,
+					  struct iovec *, bool chunk_mode);
 extern void destroy_page_pipe(struct page_pipe *p);
 extern int page_pipe_add_page(struct page_pipe *p, unsigned long addr);
 extern int page_pipe_add_hole(struct page_pipe *p, unsigned long addr);
 
 extern void debug_show_page_pipe(struct page_pipe *pp);
+void page_pipe_reinit(struct page_pipe *pp);
 
 #endif /* __CR_PAGE_PIPE_H__ */
diff --git a/mem.c b/mem.c
index f27d899..8a3dc37 100644
--- a/mem.c
+++ b/mem.c
@@ -220,7 +220,7 @@ static int __parasite_dump_pages_seized(struct parasite_ctl *ctl,
 		goto out_free;
 
 	ret = -1;
-	pp = create_page_pipe(vma_area_list->priv_size / 2, pargs_iovs(args));
+	pp = create_page_pipe(vma_area_list->priv_size / 2, pargs_iovs(args), false);
 	if (!pp)
 		goto out_close;
 
diff --git a/page-pipe.c b/page-pipe.c
index bfe1989..3736c28 100644
--- a/page-pipe.c
+++ b/page-pipe.c
@@ -7,12 +7,24 @@
 #include "util.h"
 #include "page-pipe.h"
 
+/* The number of pipes for one chunk */
+#define NR_PIPES_PER_CHUNK 8
+
 static int page_pipe_grow(struct page_pipe *pp)
 {
 	struct page_pipe_buf *ppb;
 
 	pr_debug("Will grow page pipe (iov off is %u)\n", pp->free_iov);
 
+	if (!list_empty(&pp->free_bufs)) {
+		ppb = list_first_entry(&pp->free_bufs, struct page_pipe_buf, l);
+		list_move_tail(&ppb->l, &pp->bufs);
+		goto out;
+	}
+
+	if (pp->chunk_mode && pp->nr_pipes == NR_PIPES_PER_CHUNK)
+		return -EAGAIN;
+
 	ppb = xmalloc(sizeof(*ppb));
 	if (!ppb)
 		return -1;
@@ -24,17 +36,20 @@ static int page_pipe_grow(struct page_pipe *pp)
 	}
 
 	ppb->pipe_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0) / PAGE_SIZE;
+
+	list_add_tail(&ppb->l, &pp->bufs);
+out:
 	ppb->pages_in = 0;
 	ppb->nr_segs = 0;
 	ppb->iov = &pp->iovs[pp->free_iov];
 
-	list_add_tail(&ppb->l, &pp->bufs);
 	pp->nr_pipes++;
 
 	return 0;
 }
 
-struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs)
+struct page_pipe *create_page_pipe(unsigned int nr_segs,
+				   struct iovec *iovs, bool chunk_mode)
 {
 	struct page_pipe *pp;
 
@@ -44,6 +59,7 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs)
 	if (pp) {
 		pp->nr_pipes = 0;
 		INIT_LIST_HEAD(&pp->bufs);
+		INIT_LIST_HEAD(&pp->free_bufs);
 		pp->nr_iovs = nr_segs;
 		pp->iovs = iovs;
 		pp->free_iov = 0;
@@ -52,6 +68,8 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs)
 		pp->free_hole = 0;
 		pp->holes = NULL;
 
+		pp->chunk_mode = chunk_mode;
+
 		if (page_pipe_grow(pp))
 			return NULL;
 	}
@@ -65,6 +83,7 @@ void destroy_page_pipe(struct page_pipe *pp)
 
 	pr_debug("Killing page pipe\n");
 
+	list_splice(&pp->free_bufs, &pp->bufs);
 	list_for_each_entry_safe(ppb, n, &pp->bufs, l) {
 		close(ppb->p[0]);
 		close(ppb->p[1]);
@@ -74,6 +93,40 @@ void destroy_page_pipe(struct page_pipe *pp)
 	xfree(pp);
 }
 
+void page_pipe_reinit(struct page_pipe *pp)
+{
+	struct page_pipe_buf *ppb, *n;
+
+	BUG_ON(!pp->chunk_mode);
+
+	pr_debug("Clean up page pipe\n");
+
+	list_for_each_entry_safe(ppb, n, &pp->bufs, l)
+		list_move(&ppb->l, &pp->free_bufs);
+
+	pp->free_hole = 0;
+
+	if (page_pipe_grow(pp))
+		BUG(); /* It can't fail, because ppb is in free_bufs */
+}
+
+#define PAGE_ALLOC_COSTLY_ORDER 3 /* from the kernel source code */
+struct kernel_pipe_buffer {
+        struct page *page;
+        unsigned int offset, len;
+        const struct pipe_buf_operations *ops;
+        unsigned int flags;
+        unsigned long private;
+};
+
+/*
+ * The kernel allocates the linear chunk of memory for pipe buffers.
+ * Allocation of chunks with size more than PAGE_ALLOC_COSTLY_ORDER
+ * fails very often, so we need to restrict the pipe capacity to not
+ * allocate big chunks.
+ */
+#define PIPE_MAX_SIZE ((1 << PAGE_ALLOC_COSTLY_ORDER) * PAGE_SIZE /	\
+			sizeof(struct kernel_pipe_buffer))
 #define PPB_IOV_BATCH	8
 
 static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *ppb,
@@ -82,9 +135,13 @@ static inline int try_add_page_to(struct page_pipe *pp, struct page_pipe_buf *pp
 	struct iovec *iov;
 
 	if (ppb->pages_in == ppb->pipe_size) {
+		unsigned long new_size = ppb->pipe_size << 1;
 		int ret;
 
-		ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (ppb->pipe_size * PAGE_SIZE) << 1);
+		if (new_size > PIPE_MAX_SIZE)
+			return 1;
+
+		ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
 		if (ret < 0)
 			return 1; /* need to add another buf */
 
diff --git a/shmem.c b/shmem.c
index 1deb591..0c1a97f 100644
--- a/shmem.c
+++ b/shmem.c
@@ -317,7 +317,7 @@ static int dump_one_shmem(struct shmem_info_dump *si)
 	if (!iovs)
 		goto err_unmap;
 
-	pp = create_page_pipe((nrpages + 1) / 2, iovs);
+	pp = create_page_pipe((nrpages + 1) / 2, iovs, false);
 	if (!pp)
 		goto err_iovs;
 
-- 
1.8.5.3



More information about the CRIU mailing list