<div dir="ltr"><div>This patch implements usage of process_vm_readv syscall to collect memory pages from target process during pre-dump. process_vm_readv collects pages in user-buffer, which is later vmspliced to page-pipes.</div><div><br></div><div>Signed-off-by: Abhishek Dubey &lt;<a href="mailto:dubeyabhishek777@gmail.com">dubeyabhishek777@gmail.com</a>&gt;</div>---<br> criu/cr-dump.c           |  3 +-<br> criu/include/page-xfer.h |  2 +-<br> criu/mem.c               | 88 ++++++++++++++++++------------------------------<br> criu/page-pipe.c         |  4 +--<br> criu/page-xfer.c         | 62 ++++++++++++++++++++++++++++++++--<br> criu/shmem.c             | 19 +++--------<br> 6 files changed, 102 insertions(+), 76 deletions(-)<br><br>diff --git a/criu/cr-dump.c b/criu/cr-dump.c<br>index 7f2e5ed..ee5f4f3 100644<br>--- a/criu/cr-dump.c<br>+++ b/criu/cr-dump.c<br>@@ -1501,6 +1501,7 @@ static int cr_pre_dump_finish(int status)<br>                 struct parasite_ctl *ctl = dmpi(item)-&gt;parasite_ctl;<br>                 struct page_pipe *mem_pp;<br>                 struct page_xfer xfer;<br>+                size_t off = 0;<br> <br>                 if (!ctl)<br>                         continue;<br>@@ -1512,7 +1513,7 @@ static int cr_pre_dump_finish(int status)<br>                         goto err;<br> <br>                 mem_pp = dmpi(item)-&gt;mem_pp;<br>-                ret = page_xfer_dump_pages(&amp;xfer, mem_pp);<br>+                ret = page_xfer_dump_pages(item-&gt;pid-&gt;real, &amp;xfer, mem_pp, &amp;off);<br> <br>                 xfer.close(&amp;xfer);<br> <br>diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h<br>index fa72273..74d42f2 100644<br>--- a/criu/include/page-xfer.h<br>+++ b/criu/include/page-xfer.h<br>@@ -47,7 +47,7 @@ struct page_xfer {<br> <br> extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned long id);<br> struct page_pipe;<br>-extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *);<br>+extern int page_xfer_dump_pages(int pid, struct page_xfer *, struct page_pipe *, size_t *poff);<br> extern int connect_to_page_server_to_send(void);<br> extern int connect_to_page_server_to_recv(int epfd);<br> extern int disconnect_from_page_server(void);<br>diff --git a/criu/mem.c b/criu/mem.c<br>index 6a1a87a..844d726 100644<br>--- a/criu/mem.c<br>+++ b/criu/mem.c<br>@@ -260,39 +260,7 @@ static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl<br>         return args;<br> }<br> <br>-static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl,<br>-                      struct parasite_dump_pages_args *args)<br>-{<br>-        struct page_pipe_buf *ppb;<br>-        int ret = 0;<br>-<br>-        debug_show_page_pipe(pp);<br>-<br>-        /* Step 2 -- grab pages into page-pipe */<br>-        list_for_each_entry(ppb, &amp;pp-&gt;bufs, l) {<br>-                args-&gt;nr_segs = ppb-&gt;nr_segs;<br>-                args-&gt;nr_pages = ppb-&gt;pages_in;<br>-                pr_debug(&quot;PPB: %d pages %d segs %u pipe %d off\n&quot;,<br>-                                args-&gt;nr_pages, args-&gt;nr_segs, ppb-&gt;pipe_size, args-&gt;off);<br>-<br>-                ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);<br>-                if (ret &lt; 0)<br>-                        return -1;<br>-                ret = compel_util_send_fd(ctl, ppb-&gt;p[1]);<br>-                if (ret)<br>-                        return -1;<br>-<br>-                ret = compel_rpc_sync(PARASITE_CMD_DUMPPAGES, ctl);<br>-                if (ret &lt; 0)<br>-                        return -1;<br>-<br>-                args-&gt;off += args-&gt;nr_segs;<br>-        }<br>-<br>-        return 0;<br>-}<br>-<br>-static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)<br>+static int xfer_pages( int pid, struct page_pipe *pp, struct page_xfer *xfer, size_t *poff)<br> {<br>         int ret;<br> <br>@@ -301,7 +269,7 @@ static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)<br>          *           pre-dump action (see pre_dump_one_task)<br>          */<br>         timing_start(TIME_MEMWRITE);<br>-        ret = page_xfer_dump_pages(xfer, pp);<br>+        ret = page_xfer_dump_pages(pid, xfer, pp, poff);<br>         timing_stop(TIME_MEMWRITE);<br> <br>         return ret;<br>@@ -351,7 +319,7 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,<br>                              struct page_pipe *pp, struct page_xfer *xfer,<br>                              struct parasite_dump_pages_args *args,<br>                              struct parasite_ctl *ctl, pmc_t *pmc,<br>-                             bool has_parent, bool pre_dump)<br>+                             bool has_parent, bool pre_dump, size_t *poff)<br> {<br>         u64 off = 0;<br>         u64 *map;<br>@@ -361,6 +329,12 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,<br>                                 !vma_area_is(vma, VMA_ANON_SHARED))<br>                 return 0;<br> <br>+        if (!(vma-&gt;e-&gt;prot &amp; PROT_READ)){<br>+                if(pre_dump)<br>+                        return 0;<br>+                has_parent = false;<br>+        }<br>+<br>         if (vma_entry_is(vma-&gt;e, VMA_AREA_AIORING)) {<br>                 if (pre_dump)<br>                         return 0;<br>@@ -379,9 +353,7 @@ again:<br>         if (ret == -EAGAIN) {<br>                 BUG_ON(!(pp-&gt;flags &amp; PP_CHUNK_MODE));<br> <br>-                ret = drain_pages(pp, ctl, args);<br>-                if (!ret)<br>-                        ret = xfer_pages(pp, xfer);<br>+                ret = xfer_pages(item-&gt;pid-&gt;real, pp, xfer, poff);<br>                 if (!ret) {<br>                         page_pipe_reinit(pp);<br>                         goto again;<br>@@ -406,6 +378,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,<br>         unsigned long pmc_size;<br>         int possible_pid_reuse = 0;<br>         bool has_parent;<br>+        size_t poff = 0;<br> <br>         pr_info(&quot;\n&quot;);<br>         pr_info(&quot;Dumping pages (type: %d pid: %d)\n&quot;, CR_FD_PAGES, item-&gt;pid-&gt;real);<br>@@ -470,11 +443,12 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,<br>         /*<br>          * Step 1 -- generate the pagemap<br>          */<br>+        poff = 0;<br>         args-&gt;off = 0;<br>         has_parent = !!xfer.parent &amp;&amp; !possible_pid_reuse;<br>         list_for_each_entry(vma_area, &amp;vma_area_list-&gt;h, list) {<br>                 ret = generate_vma_iovs(item, vma_area, pp, &amp;xfer, args, ctl,<br>-                                        &amp;pmc, has_parent, mdc-&gt;pre_dump);<br>+                                        &amp;pmc, has_parent, mdc-&gt;pre_dump, &amp;poff);<br>                 if (ret &lt; 0)<br>                         goto out_xfer;<br>         }<br>@@ -482,9 +456,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,<br>         if (mdc-&gt;lazy)<br>                 memcpy(pargs_iovs(args), pp-&gt;iovs,<br>                        sizeof(struct iovec) * pp-&gt;nr_iovs);<br>-        ret = drain_pages(pp, ctl, args);<br>-        if (!ret &amp;&amp; !mdc-&gt;pre_dump)<br>-                ret = xfer_pages(pp, &amp;xfer);<br>+        if(!mdc-&gt;pre_dump)<br>+                ret = xfer_pages(item-&gt;pid-&gt;real, pp, &amp;xfer, &amp;poff);<br>         if (ret)<br>                 goto out_xfer;<br> <br>@@ -529,17 +502,18 @@ int parasite_dump_pages_seized(struct pstree_item *item,<br>          *<br>          * Afterwards -- reprotect memory back.<br>          */<br>+        if(!mdc-&gt;pre_dump){<br>+                pargs-&gt;add_prot = PROT_READ;<br>+                ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);<br>+                if (ret) {<br>+                        pr_err(&quot;Can&#39;t dump unprotect vmas with parasite\n&quot;);<br>+                        return ret;<br>+                }<br> <br>-        pargs-&gt;add_prot = PROT_READ;<br>-        ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);<br>-        if (ret) {<br>-                pr_err(&quot;Can&#39;t dump unprotect vmas with parasite\n&quot;);<br>-                return ret;<br>-        }<br>-<br>-        if (fault_injected(FI_DUMP_PAGES)) {<br>-                pr_err(&quot;fault: Dump VMA pages failure!\n&quot;);<br>-                return -1;<br>+                if (fault_injected(FI_DUMP_PAGES)) {<br>+                        pr_err(&quot;fault: Dump VMA pages failure!\n&quot;);<br>+                        return -1;<br>+                }<br>         }<br> <br>         ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);<br>@@ -549,10 +523,12 @@ int parasite_dump_pages_seized(struct pstree_item *item,<br>                 return ret;<br>         }<br> <br>-        pargs-&gt;add_prot = 0;<br>-        if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {<br>-                pr_err(&quot;Can&#39;t rollback unprotected vmas with parasite\n&quot;);<br>-                ret = -1;<br>+        if(!mdc-&gt;pre_dump){<br>+                pargs-&gt;add_prot = 0;<br>+                if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {<br>+                        pr_err(&quot;Can&#39;t rollback unprotected vmas with parasite\n&quot;);<br>+                        ret = -1;<br>+                }<br>         }<br> <br>         return ret;<br>diff --git a/criu/page-pipe.c b/criu/page-pipe.c<br>index c32b893..c70ba70 100644<br>--- a/criu/page-pipe.c<br>+++ b/criu/page-pipe.c<br>@@ -33,7 +33,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf *ppb, unsigned long new_size)<br> {<br>         int ret;<br> <br>-        ret = fcntl(ppb-&gt;p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);<br>+        ret = fcntl(ppb-&gt;p[0], F_SETPIPE_SZ, (new_size * PAGE_SIZE) + 1);<br>         if (ret &lt; 0)<br>                 return -1;<br> <br>@@ -41,7 +41,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf *ppb, unsigned long new_size)<br>         BUG_ON(ret &lt; ppb-&gt;pipe_size);<br> <br>         pr_debug(&quot;Grow pipe %x -&gt; %x\n&quot;, ppb-&gt;pipe_size, ret);<br>-        ppb-&gt;pipe_size = ret;<br>+        ppb-&gt;pipe_size = ret - 1;<br> <br>         return 0;<br> }<br>diff --git a/criu/page-xfer.c b/criu/page-xfer.c<br>index 9cdffd8..9262221 100644<br>--- a/criu/page-xfer.c<br>+++ b/criu/page-xfer.c<br>@@ -496,19 +496,75 @@ static inline u32 ppb_xfer_flags(struct page_xfer *xfer, struct page_pipe_buf *p<br>                 return PE_PRESENT;<br> }<br> <br>-int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)<br>+static char userbuf[4 &lt;&lt; 20];<br>+<br>+int page_xfer_dump_pages(int pid, struct page_xfer *xfer, struct page_pipe *pp, size_t *poff)<br> {<br>         struct page_pipe_buf *ppb;<br>         unsigned int cur_hole = 0;<br>-        int ret;<br>+        unsigned int ret, ret2;<br>+        size_t off;<br>+        struct iovec *remoteiovs = pp-&gt;iovs;<br> <br>         pr_debug(&quot;Transferring pages:\n&quot;);<br> <br>+        off = *poff;<br>+<br>         list_for_each_entry(ppb, &amp;pp-&gt;bufs, l) {<br>                 unsigned int i;<br> <br>                 pr_debug(&quot;\tbuf %d/%d\n&quot;, ppb-&gt;pages_in, ppb-&gt;nr_segs);<br> <br>+                size_t bufsize = sizeof(userbuf);<br>+                struct iovec bufvec = {.iov_len = bufsize};<br>+                bufvec.iov_base = userbuf;<br>+<br>+                ret = syscall(__NR_process_vm_readv, pid, &amp;bufvec, 1, \<br>+                                                &amp;remoteiovs[off], ppb-&gt;nr_segs, 0);<br>+                if (ret &lt; 0) {<br>+                        switch (errno) {<br>+                                case EINVAL:<br>+                                        pr_debug(&quot;process_vm_readv: Invalid arguments\n&quot;);<br>+                                break;<br>+                                case EFAULT:<br>+                                        pr_debug(&quot;process_vm_readv: Unable to access remote iov\n&quot;);<br>+                                break;<br>+                                case ENOMEM:<br>+                                        pr_debug(&quot;process_vm_readv: Unable to allocate memory\n&quot;);<br>+                                break;<br>+                                case EPERM:<br>+                                        pr_debug(&quot;process_vm_readv: Insufficient privileges\n&quot;);<br>+                                break;<br>+                                case ESRCH:<br>+                                        pr_debug(&quot;process_vm_readv: Target process doesn&#39;t exist\n&quot;);<br>+                                break;<br>+                                default:<br>+                                        pr_debug(&quot;process_vm_readv: Uncategorised error\n&quot;);<br>+                        }<br>+                        return 0;<br>+                }<br>+<br>+                /* Handling partial reads due to modified mappings*/<br>+<br>+                if(ret != ppb-&gt;pages_in * PAGE_SIZE){<br>+                        pr_debug(&quot;Can&#39;t read remote iovs (%d(%lu)/%d)\n&quot;, ret,\<br>+                                (unsigned long int)PAGE_SIZE * ppb-&gt;pages_in, ppb-&gt;pages_in);<br>+                        continue;<br>+                }<br>+<br>+                bufvec.iov_len = ret;<br>+                ret2 = vmsplice(ppb-&gt;p[1], &amp;bufvec, 1, SPLICE_F_NONBLOCK);<br>+<br>+                if(ret2 == -1){<br>+                        pr_debug(&quot;vmsplice: Failed to splice user buffer to pipe\n&quot;);<br>+                        continue;<br>+                }<br>+<br>+                if(ret != ret2){<br>+                        pr_debug(&quot;Partial splice from user buffer to pipe (%d)\n&quot;, ret2);<br>+                        continue;<br>+                }<br>+<br>                 for (i = 0; i &lt; ppb-&gt;nr_segs; i++) {<br>                         struct iovec iov = ppb-&gt;iov[i];<br>                         u32 flags;<br>@@ -530,8 +586,10 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)<br>                                                 ppb-&gt;p[0], iov.iov_len))<br>                                 return -1;<br>                 }<br>+                off += ppb-&gt;nr_segs;<br>         }<br> <br>+        *poff = off;<br>         return dump_holes(xfer, pp, &amp;cur_hole, NULL);<br> }<br> <br>diff --git a/criu/shmem.c b/criu/shmem.c<br>index 03b088f..a797bde 100644<br>--- a/criu/shmem.c<br>+++ b/criu/shmem.c<br>@@ -629,19 +629,9 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)<br>         return 0;<br> }<br> <br>-static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer)<br>+static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, size_t *off)<br> {<br>-        struct page_pipe_buf *ppb;<br>-<br>-        list_for_each_entry(ppb, &amp;pp-&gt;bufs, l)<br>-                if (vmsplice(ppb-&gt;p[1], ppb-&gt;iov, ppb-&gt;nr_segs,<br>-                                        SPLICE_F_GIFT | SPLICE_F_NONBLOCK) !=<br>-                                ppb-&gt;pages_in * PAGE_SIZE) {<br>-                        pr_perror(&quot;Can&#39;t get shmem into page-pipe&quot;);<br>-                        return -1;<br>-                }<br>-<br>-        return page_xfer_dump_pages(xfer, pp);<br>+        return page_xfer_dump_pages(getpid(), xfer, pp, off);<br> }<br> <br> static int next_data_segment(int fd, unsigned long pfn,<br>@@ -678,6 +668,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si)<br>         int err, ret = -1;<br>         unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;<br>         unsigned long pages[2] = {};<br>+        size_t off = 0;<br> <br>         nrpages = (si-&gt;size + PAGE_SIZE - 1) / PAGE_SIZE;<br> <br>@@ -726,7 +717,7 @@ again:<br>                 }<br> <br>                 if (ret == -EAGAIN) {<br>-                        ret = dump_pages(pp, &amp;xfer);<br>+                        ret = dump_pages(pp, &amp;xfer, &amp;off);<br>                         if (ret)<br>                                 goto err_xfer;<br>                         page_pipe_reinit(pp);<br>@@ -742,7 +733,7 @@ again:<br>         cnt_add(CNT_SHPAGES_SKIPPED_PARENT, pages[0]);<br>         cnt_add(CNT_SHPAGES_WRITTEN, pages[1]);<br> <br>-        ret = dump_pages(pp, &amp;xfer);<br>+        ret = dump_pages(pp, &amp;xfer, &amp;off);<br> <br> err_xfer:<br>         xfer.close(&amp;xfer);<br>-- <br>2.7.4</div>