[CRIU] [PATCH] vmsplice user-buffer(grabbed by process_vm_readv) to page-pipe
abhishek dubey
dubeyabhishek777 at gmail.com
Mon Jul 8 23:20:57 MSK 2019
Hi Andrei,
Please find inline replies.
On 08/07/19 9:35 AM, Andrei Vagin wrote:
> On Fri, Jul 05, 2019 at 05:34:26PM +0530, Abhishek Dubey wrote:
>> This patch implements usage of process_vm_readv
>> syscall to collect memory pages from target process during
>> pre-dump. process_vm_readv collects pages in user-buffer,
>> which is later vmspliced to page-pipes.
> You have to be more detailed in the commit message.
>
> Frist of all, you have to explain why we need this patch.
I will take care of same from next time.
>
> If you improve performance, you need to give some numbers.
Still I am handling issue of "missing entry in parent pagemap". Next
patch will have comparative figures included.
>
> If you implement a new feature, you need to provide tests for it.
>
> If I understand this patch right, criu freezes all processes, collects
> page maps, unfreezes process, dump pages? If this is right,
Yes, this is right.
> you need to
> explain how it handles in this case:
>
> addr = map(NULL, 4 * PAGE_SIZE) |
> addr[0] = 1 |
> addr[PAGE_SIZE] = 2 |
> addr[2 * PAGE_SIZE] = 3 |
> addr[3 * PAGE_SIZE] = 4 |
> | criu freezes the process
> | criu collects page maps
> | criu unfreezes the process
> unmap(addr, PAGE_SIZE * 2) |
> | criu dumps pages
Need to explain this example somewhere along with code(as in
page-pipe.h) or some other place?
>
> here are a few inline comments
>> Signed-off-by: Abhishek Dubey <dubeyabhishek777 at gmail.com>
>> ---
>> criu/cr-dump.c | 3 +-
>> criu/include/page-xfer.h | 2 +-
>> criu/mem.c | 88 ++++++++++++++++++------------------------------
>> criu/page-pipe.c | 4 +--
>> criu/page-xfer.c | 62 ++++++++++++++++++++++++++++++++--
>> criu/shmem.c | 19 +++--------
>> 6 files changed, 102 insertions(+), 76 deletions(-)
>>
>> diff --git a/criu/cr-dump.c b/criu/cr-dump.c
>> index 7f2e5ed..ee5f4f3 100644
>> --- a/criu/cr-dump.c
>> +++ b/criu/cr-dump.c
>> @@ -1501,6 +1501,7 @@ static int cr_pre_dump_finish(int status)
>> struct parasite_ctl *ctl = dmpi(item)->parasite_ctl;
>> struct page_pipe *mem_pp;
>> struct page_xfer xfer;
>> + size_t off = 0;
>>
>> if (!ctl)
>> continue;
>> @@ -1512,7 +1513,7 @@ static int cr_pre_dump_finish(int status)
>> goto err;
>>
>> mem_pp = dmpi(item)->mem_pp;
>> - ret = page_xfer_dump_pages(&xfer, mem_pp);
>> + ret = page_xfer_dump_pages(item->pid->real, &xfer, mem_pp, &off);
>>
>> xfer.close(&xfer);
>>
>> diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h
>> index fa72273..74d42f2 100644
>> --- a/criu/include/page-xfer.h
>> +++ b/criu/include/page-xfer.h
>> @@ -47,7 +47,7 @@ struct page_xfer {
>>
>> extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned long id);
>> struct page_pipe;
>> -extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *);
>> +extern int page_xfer_dump_pages(int pid, struct page_xfer *, struct page_pipe *, size_t *poff);
>> extern int connect_to_page_server_to_send(void);
>> extern int connect_to_page_server_to_recv(int epfd);
>> extern int disconnect_from_page_server(void);
>> diff --git a/criu/mem.c b/criu/mem.c
>> index 6a1a87a..844d726 100644
>> --- a/criu/mem.c
>> +++ b/criu/mem.c
>> @@ -260,39 +260,7 @@ static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl
>> return args;
>> }
>>
>> -static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl,
>> - struct parasite_dump_pages_args *args)
>> -{
>> - struct page_pipe_buf *ppb;
>> - int ret = 0;
>> -
>> - debug_show_page_pipe(pp);
>> -
>> - /* Step 2 -- grab pages into page-pipe */
>> - list_for_each_entry(ppb, &pp->bufs, l) {
>> - args->nr_segs = ppb->nr_segs;
>> - args->nr_pages = ppb->pages_in;
>> - pr_debug("PPB: %d pages %d segs %u pipe %d off\n",
>> - args->nr_pages, args->nr_segs, ppb->pipe_size, args->off);
>> -
>> - ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);
>> - if (ret < 0)
>> - return -1;
>> - ret = compel_util_send_fd(ctl, ppb->p[1]);
>> - if (ret)
>> - return -1;
>> -
>> - ret = compel_rpc_sync(PARASITE_CMD_DUMPPAGES, ctl);
>> - if (ret < 0)
>> - return -1;
>> -
>> - args->off += args->nr_segs;
>> - }
>> -
>> - return 0;
>> -}
>> -
>> -static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
>> +static int xfer_pages( int pid, struct page_pipe *pp, struct page_xfer *xfer, size_t *poff)
>> {
>> int ret;
>>
>> @@ -301,7 +269,7 @@ static int xfer_pages(struct page_pipe *pp, struct page_xfer *xfer)
>> * pre-dump action (see pre_dump_one_task)
>> */
>> timing_start(TIME_MEMWRITE);
>> - ret = page_xfer_dump_pages(xfer, pp);
>> + ret = page_xfer_dump_pages(pid, xfer, pp, poff);
>> timing_stop(TIME_MEMWRITE);
>>
>> return ret;
>> @@ -351,7 +319,7 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>> struct page_pipe *pp, struct page_xfer *xfer,
>> struct parasite_dump_pages_args *args,
>> struct parasite_ctl *ctl, pmc_t *pmc,
>> - bool has_parent, bool pre_dump)
>> + bool has_parent, bool pre_dump, size_t *poff)
>> {
>> u64 off = 0;
>> u64 *map;
>> @@ -361,6 +329,12 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma,
>> !vma_area_is(vma, VMA_ANON_SHARED))
>> return 0;
>>
>> + if (!(vma->e->prot & PROT_READ)){
>> + if(pre_dump)
>> + return 0;
>> + has_parent = false;
>> + }
>> +
>> if (vma_entry_is(vma->e, VMA_AREA_AIORING)) {
>> if (pre_dump)
>> return 0;
>> @@ -379,9 +353,7 @@ again:
>> if (ret == -EAGAIN) {
>> BUG_ON(!(pp->flags & PP_CHUNK_MODE));
>>
>> - ret = drain_pages(pp, ctl, args);
>> - if (!ret)
>> - ret = xfer_pages(pp, xfer);
>> + ret = xfer_pages(item->pid->real, pp, xfer, poff);
>> if (!ret) {
>> page_pipe_reinit(pp);
>> goto again;
>> @@ -406,6 +378,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>> unsigned long pmc_size;
>> int possible_pid_reuse = 0;
>> bool has_parent;
>> + size_t poff = 0;
>>
>> pr_info("\n");
>> pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real);
>> @@ -470,11 +443,12 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>> /*
>> * Step 1 -- generate the pagemap
>> */
>> + poff = 0;
>> args->off = 0;
>> has_parent = !!xfer.parent && !possible_pid_reuse;
>> list_for_each_entry(vma_area, &vma_area_list->h, list) {
>> ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
>> - &pmc, has_parent, mdc->pre_dump);
>> + &pmc, has_parent, mdc->pre_dump, &poff);
>> if (ret < 0)
>> goto out_xfer;
>> }
>> @@ -482,9 +456,8 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
>> if (mdc->lazy)
>> memcpy(pargs_iovs(args), pp->iovs,
>> sizeof(struct iovec) * pp->nr_iovs);
>> - ret = drain_pages(pp, ctl, args);
>> - if (!ret && !mdc->pre_dump)
>> - ret = xfer_pages(pp, &xfer);
>> + if(!mdc->pre_dump)
>> + ret = xfer_pages(item->pid->real, pp, &xfer, &poff);
>> if (ret)
>> goto out_xfer;
>>
>> @@ -529,17 +502,18 @@ int parasite_dump_pages_seized(struct pstree_item *item,
>> *
>> * Afterwards -- reprotect memory back.
>> */
>> + if(!mdc->pre_dump){
>> + pargs->add_prot = PROT_READ;
>> + ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
> This should be in a separate patch with explanation why we don't need
> mprotect in this case.
Sure! I will get it in separate patch with proper reasoning.
>
>> + if (ret) {
>> + pr_err("Can't dump unprotect vmas with parasite\n");
>> + return ret;
>> + }
>>
>> - pargs->add_prot = PROT_READ;
>> - ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl);
>> - if (ret) {
>> - pr_err("Can't dump unprotect vmas with parasite\n");
>> - return ret;
>> - }
>> -
>> - if (fault_injected(FI_DUMP_PAGES)) {
>> - pr_err("fault: Dump VMA pages failure!\n");
>> - return -1;
>> + if (fault_injected(FI_DUMP_PAGES)) {
>> + pr_err("fault: Dump VMA pages failure!\n");
>> + return -1;
>> + }
>> }
>>
>> ret = __parasite_dump_pages_seized(item, pargs, vma_area_list, mdc, ctl);
>> @@ -549,10 +523,12 @@ int parasite_dump_pages_seized(struct pstree_item *item,
>> return ret;
>> }
>>
>> - pargs->add_prot = 0;
>> - if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
>> - pr_err("Can't rollback unprotected vmas with parasite\n");
>> - ret = -1;
>> + if(!mdc->pre_dump){
>> + pargs->add_prot = 0;
>> + if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) {
>> + pr_err("Can't rollback unprotected vmas with parasite\n");
>> + ret = -1;
>> + }
>> }
>>
>> return ret;
>> diff --git a/criu/page-pipe.c b/criu/page-pipe.c
>> index c32b893..c70ba70 100644
>> --- a/criu/page-pipe.c
>> +++ b/criu/page-pipe.c
>> @@ -33,7 +33,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf *ppb, unsigned long new_size)
>> {
>> int ret;
>>
>> - ret = fcntl(ppb->p[0], F_SETPIPE_SZ, new_size * PAGE_SIZE);
>> + ret = fcntl(ppb->p[0], F_SETPIPE_SZ, (new_size * PAGE_SIZE) + 1);
>> if (ret < 0)
>> return -1;
>>
>> @@ -41,7 +41,7 @@ static int __ppb_resize_pipe(struct page_pipe_buf *ppb, unsigned long new_size)
>> BUG_ON(ret < ppb->pipe_size);
>>
>> pr_debug("Grow pipe %x -> %x\n", ppb->pipe_size, ret);
>> - ppb->pipe_size = ret;
>> + ppb->pipe_size = ret - 1;
>>
>> return 0;
>> }
>> diff --git a/criu/page-xfer.c b/criu/page-xfer.c
>> index 9cdffd8..a5396b1 100644
>> --- a/criu/page-xfer.c
>> +++ b/criu/page-xfer.c
>> @@ -496,19 +496,75 @@ static inline u32 ppb_xfer_flags(struct page_xfer *xfer, struct page_pipe_buf *p
>> return PE_PRESENT;
>> }
>>
>> -int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
>> +static char userbuf[4 << 20];
>> +
>> +int page_xfer_dump_pages(int pid, struct page_xfer *xfer, struct page_pipe *pp, size_t *poff)
>> {
>> struct page_pipe_buf *ppb;
>> unsigned int cur_hole = 0;
>> - int ret;
>> + unsigned int ret, ret2;
>> + size_t off;
>> + struct iovec *remoteiovs = pp->iovs;
>>
>> pr_debug("Transferring pages:\n");
>>
>> + off = *poff;
>> +
>> list_for_each_entry(ppb, &pp->bufs, l) {
>> unsigned int i;
>>
>> pr_debug("\tbuf %d/%d\n", ppb->pages_in, ppb->nr_segs);
>>
>> + size_t bufsize = sizeof(userbuf);
>> + struct iovec bufvec = {.iov_len = bufsize};
>> + bufvec.iov_base = userbuf;
>> +
>> + ret = syscall(__NR_process_vm_readv, pid, &bufvec, 1, \
>> + &remoteiovs[off], ppb->nr_segs, 0);
>> + if (ret == -1) {
>> + switch (errno) {
> you don't need this switch. You can use pr_perror of pr_debug("....: %s". strerror(errno));
Ok :)
>
>> + case EINVAL:
>> + pr_debug("process_vm_readv: Invalid arguments\n");
>> + break;
>> + case EFAULT:
>> + pr_debug("process_vm_readv: Unable to access remote iov\n");
>> + break;
>> + case ENOMEM:
>> + pr_debug("process_vm_readv: Unable to allocate memory\n");
>> + break;
>> + case EPERM:
>> + pr_debug("process_vm_readv: Insufficient privileges\n");
>> + break;
>> + case ESRCH:
>> + pr_debug("process_vm_readv: Target process doesn't exist\n");
>> + break;
>> + default:
>> + pr_debug("process_vm_readv: Uncategorised error\n");
>> + }
>> + return 0;
> Why do we return 0 in error cases? You need to add a comment here.
Will add brief description.
>
>> + }
>> +
>> + /* Handling partial reads due to modified mappings*/
>> +
>> + if(ret != ppb->pages_in * PAGE_SIZE){
>> + pr_debug("Can't read remote iovs (%d(%lu)/%d)\n", ret,\
>> + (unsigned long int)PAGE_SIZE * ppb->pages_in, ppb->pages_in);
>> + continue;
>> + }
>> +
>> + bufvec.iov_len = ret;
>> + ret2 = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK);
>> +
>> + if(ret2 == -1){
>> + pr_debug("vmsplice: Failed to splice user buffer to pipe\n");
>> + continue;
>> + }
>> +
>> + if(ret != ret2){
>> + pr_debug("Partial splice from user buffer to pipe (%d)\n", ret2);
>> + continue;
>> + }
>> +
>> for (i = 0; i < ppb->nr_segs; i++) {
>> struct iovec iov = ppb->iov[i];
>> u32 flags;
>> @@ -530,8 +586,10 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp)
>> ppb->p[0], iov.iov_len))
>> return -1;
>> }
>> + off += ppb->nr_segs;
>> }
>>
>> + *poff = off;
>> return dump_holes(xfer, pp, &cur_hole, NULL);
>> }
>>
>> diff --git a/criu/shmem.c b/criu/shmem.c
>> index 03b088f..a797bde 100644
>> --- a/criu/shmem.c
>> +++ b/criu/shmem.c
>> @@ -629,19 +629,9 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map)
>> return 0;
>> }
>>
>> -static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer)
>> +static int dump_pages(struct page_pipe *pp, struct page_xfer *xfer, size_t *off)
>> {
>> - struct page_pipe_buf *ppb;
>> -
>> - list_for_each_entry(ppb, &pp->bufs, l)
>> - if (vmsplice(ppb->p[1], ppb->iov, ppb->nr_segs,
>> - SPLICE_F_GIFT | SPLICE_F_NONBLOCK) !=
>> - ppb->pages_in * PAGE_SIZE) {
>> - pr_perror("Can't get shmem into page-pipe");
>> - return -1;
>> - }
>> -
>> - return page_xfer_dump_pages(xfer, pp);
>> + return page_xfer_dump_pages(getpid(), xfer, pp, off);
>> }
>>
>> static int next_data_segment(int fd, unsigned long pfn,
>> @@ -678,6 +668,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si)
>> int err, ret = -1;
>> unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0;
>> unsigned long pages[2] = {};
>> + size_t off = 0;
>>
>> nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE;
>>
>> @@ -726,7 +717,7 @@ again:
>> }
>>
>> if (ret == -EAGAIN) {
>> - ret = dump_pages(pp, &xfer);
>> + ret = dump_pages(pp, &xfer, &off);
>> if (ret)
>> goto err_xfer;
>> page_pipe_reinit(pp);
>> @@ -742,7 +733,7 @@ again:
>> cnt_add(CNT_SHPAGES_SKIPPED_PARENT, pages[0]);
>> cnt_add(CNT_SHPAGES_WRITTEN, pages[1]);
>>
>> - ret = dump_pages(pp, &xfer);
>> + ret = dump_pages(pp, &xfer, &off);
>>
>> err_xfer:
>> xfer.close(&xfer);
>> --
>> 2.7.4
>>
-Abhishek
More information about the CRIU
mailing list