[CRIU] Re: [PATCH cr] [RFC] shmem: rework of dumping shared memory
Pavel Emelyanov
xemul at parallels.com
Mon Mar 19 03:29:35 EDT 2012
On 03/17/2012 11:57 PM, Andrey Vagin wrote:
>
> vma_entry contains shmid and all shared memory are dumped in own files.
Cool!
> The most interesting thing is restore. A maping is restored by process with the
> smallest pid. The mamping is created before executing restorer. We map a full
> mapping, then we open a file from /proc/pid/map_files and store a descriptor in
> vma_info. The mapping is unmaped. Now we can map any region of this mapping in
> restorer.
>
> The content are restored from crtools before the final executing processes.
> I don't want to restore it in restore, because in this case we need to wait
> when all tasks maps this mapping.
>
> I don't know how to split this patch...
Shame on you :(
First of all, git-bisect-safe is not a strict requirement by now, so you can
split the patch even if the individual parts do not work. But even without this
easing, there's still smth you can split. Find a couple of obvious suggestions
inline.
> Signed-off-by: Andrey Vagin <avagin at openvz.org>
> ---
> cr-dump.c | 135 ++++++++++++++++--
> cr-restore.c | 406 ++++++++++++++++++++++++++--------------------------
> cr-show.c | 24 ---
> crtools.c | 17 +--
> include/crtools.h | 10 +-
> include/image.h | 9 +-
> include/parasite.h | 4 -
> include/restorer.h | 18 ++-
> include/util.h | 2 +-
> parasite-syscall.c | 17 +--
> parasite.c | 17 +--
> proc_parse.c | 4 +-
> restorer.c | 8 +-
> util.c | 5 +-
> 14 files changed, 375 insertions(+), 301 deletions(-)
>
> diff --git a/cr-dump.c b/cr-dump.c
> index 7be2a96..2d5ad20 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -415,6 +415,33 @@ static int dump_task_files(pid_t pid, const struct cr_fdset *cr_fdset,
> return 0;
> }
>
> +struct shmem_info
> +{
> + unsigned long size;
> + unsigned long shmid;
> + unsigned long start;
> + unsigned long end;
> + int pid;
> +};
> +
> +int nr_shmems;
> +struct shmems {
> + struct shmem_info entries[0];
> +} *shmems;
Either move the nr_shmems into struct shmems, or remove the struct completely
and use plain struct shmem_info pointer.
> +#define SHMEMS_SIZE 4096
> +
> +static struct shmem_info* shmem_find(unsigned long shmid)
> +{
> + int i;
> +
> + for (i = 0; i < nr_shmems; i++)
> + if (shmems->entries[i].shmid == shmid)
> + return &shmems->entries[i];
> +
> + return NULL;
> +}
> +
> static int dump_task_mappings(pid_t pid, const struct list_head *vma_area_list,
> const struct cr_fdset *cr_fdset)
> {
> @@ -1402,6 +1443,76 @@ err_free:
> return ret;
> }
>
> +static int cr_dump_shmem(int pid)
> +{
> + int i, err;
> + struct cr_fdset *cr_fdset = NULL;
> +
> + for (i = 0; i < nr_shmems; i++) {
> + struct shmem_info *si = &shmems->entries[i];
> + char path[128];
> + int pfd, fd;
> + void *addr;
> + unsigned long pfn, nrpages = (si->size + PAGE_SIZE -1) / PAGE_SIZE;
> + unsigned char map[nrpages];
Please, no. Allocate memory for this with xmalloc.
> +
> + pfd = open_proc(si->pid, "map_files");
> + if (pfd < 0)
> + goto err;
> +
> + snprintf(path, 128, "%lx-%lx", si->start, si->end);
> + fd = openat(pfd, path, O_RDONLY);
Does the open_proc work for you here? I believe it does.
> + close(pfd);
> + if (fd < 0) {
> + pr_err("can't open file: %s", path);
> + goto err;
> + }
> +
> + addr = mmap(NULL, si->size, PROT_READ, MAP_SHARED, fd, 0);
> + close(fd);
> + if (addr == MAP_FAILED) {
> + pr_err("can't map file: %s", path);
> + goto err;
> + }
> +
> + err = mincore(addr, si->size, map);
> + if (err) {
> + munmap(addr, si->size);
> + goto err;
> + }
> +
> + fd = open_image(CR_FD_SHMEM_PAGES, O_WRONLY | O_CREAT, si->shmid);
> + if (fd < 0)
> + goto err;
> +
> + for (pfn = 0; pfn < nrpages; pfn++) {
> + u64 offset = pfn * PAGE_SIZE;
offset = pfn << PAGE_SHIFT;
> + if (!(map[pfn] & PAGE_RSS))
> + continue;
> +
> + if (write_img_buf(fd, &offset, sizeof(offset)))
> + break;
> + if (write_img_buf(fd, addr + offset, PAGE_SIZE))
> + break;
> + }
> +
> + munmap(addr, si->size);
> +
> + if (pfn == nrpages) {
> + write_img(fd, &zero_page_entry);
Error check missed.
> + close(fd);
> + } else {
> + close(fd);
> + goto err;
> + }
This is ugly. Make the err_close mark at the end of a file and turn the pfn dumping
breaks into goto-s.
> + }
> +
> + return 0;
> +err:
> + return -1;
> +}
> +
> int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
> {
> LIST_HEAD(pstree_list);
> @@ -1440,6 +1551,11 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
> goto err;
> close_cr_fdset(&cr_fdset);
>
> + nr_shmems = 0;
> + shmems = malloc(SHMEMS_SIZE);
xmalloc
> + if (!shmems)
> + goto err;
> +
> list_for_each_entry(item, &pstree_list, list) {
> cr_fdset = cr_dump_fdset_open(item->pid, CR_FD_DESC_NONE, NULL);
> if (!cr_fdset)
> @@ -1463,8 +1579,9 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
> if (opts->leader_only)
> break;
> }
> - ret = 0;
>
> + ret = cr_dump_shmem(pid);
> + free(shmems);
I'd move the free (well, xfree) into cr_dump_shmem.
> err:
> pstree_switch_state(&pstree_list, opts);
> free_pstree(&pstree_list);
> diff --git a/cr-restore.c b/cr-restore.c
> index 9100c55..ad005b5 100644
> --- a/cr-restore.c
> +++ b/cr-restore.c
> @@ -199,20 +201,78 @@ static int collect_shmem(int pid, struct shmem_entry *e)
> return -1;
> }
>
> - memset(&shmems->entries[nr_shmems], 0, sizeof(shmems->entries[0]));
> + si = &shmems->entries[nr_shmems];
> + shmems->nr_shmems++;
>
> - entries[nr_shmems].start = e->start;
> - entries[nr_shmems].end = e->end;
> - entries[nr_shmems].shmid = e->shmid;
> - entries[nr_shmems].pid = pid;
> + si->start = vi->start;
> + si->end = vi->end;
> + si->shmid = vi->shmid;
> + si->pid = pid;
> + si->size = size;
> + si->fd = -1;
>
> - cr_wait_init(&entries[nr_shmems].lock);
> -
> - shmems->nr_shmems++;
> + cr_wait_init(&si->lock);
>
> return 0;
> }
>
> +static int prepare_shmem_pid(int pid)
> +{
> + int fd, ret = -1;
> + struct vma_entry vi;
> + struct task_core_entry tc;
> + struct image_header hdr;
> +
> + fd = open_image_ro(CR_FD_CORE, pid);
> + if (fd < 0)
> + return -1;
> +
> + if (read_img(fd, &hdr) < 0)
> + goto out;
Use lseek instead. File format may change and the changer will forget
about this code relies on the fact that tc goes right after the hdr.
> + if (read_img(fd, &tc) < 0)
> + goto out;
> +
> + if (tc.task_state == TASK_DEAD) {
> + ret = 0;
> + goto out;
> + }
This one is unnecessary, as the subsequent reads will read no vmas.
> + lseek(fd, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
> +
> + while (1) {
> + ret = read(fd, &vi, sizeof(vi));
Use read_img
> + if (ret < 0) {
> + pr_perror("%d: Can't read vma_entry", pid);
> + } else if (ret != sizeof(vi)) {
> + pr_err("%d: Incomplete vma_entry (%d != %ld)\n",
> + pid, ret, sizeof(vi));
> + ret = -1;
> + goto out;
> + }
> +
> + pr_info("%d: vma %lx %lx\n", pid, vi.start, vi.end);
> +
> + if (final_vma_entry(&vi))
> + break;
> +
> + if (!vma_entry_is(&vi, VMA_ANON_SHARED))
> + continue;
> +
> + if (vma_entry_is(&vi, VMA_AREA_SYSVIPC))
> + continue;
> +
> + ret = collect_shmem(pid, &vi);
> + if (ret)
> + break;
> + }
> +
> +out:
> + close(fd);
> + return ret;
> +}
> +
> +
> static int collect_pipe(int pid, struct pipe_entry *e, int p_fd)
> {
> int i;
> @@ -550,15 +491,47 @@ static int try_fixup_shared_map(int pid, struct vma_entry *vi, int fd)
> pr_perror("%d: Can't open shmem", pid);
> return -1;
> }
> -write_fd:
> - lseek(fd, -sizeof(*vi), SEEK_CUR);
> - vi->fd = sh_fd;
> pr_info("%d: Fixed %lx vma %lx/%d shmem -> %d\n",
> pid, vi->start, si->shmid, si->pid, sh_fd);
> - if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) {
> - pr_perror("%d: Can't write img", pid);
> - return -1;
> - }
> + vi->fd = sh_fd;
> + } else if (si->pid == pid) {
ROFL
> + void *addr;
> + int f;
> + char path[128];
> +
> + if (si->start == vi->start) {
> + /* vi->pgoff may be not zero and a full shmem may be
> + * mapped here, so we use this dirty hack. */
> + addr = mmap(NULL, si->size,
> + PROT_WRITE | PROT_READ,
> + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> + if (addr == MAP_FAILED) {
> + pr_err("Can't mmap shmid=0x%lx size=%ld\n",
> + vi->shmid, si->size);
> + return -1;
> + }
> +
> + snprintf(path, 128, "/proc/self/map_files/%lx-%lx",
> + (unsigned long) addr,
> + (unsigned long) addr + si->size);
> +
> + f = open(path, O_RDWR);
> + munmap(addr, si->size);
Ugly as sin :( Let's find better solution when you're in the office.
> + if (f < 0)
> + return -1;
> +
> + vi->fd = si->fd = f;
> + } else
> + vi->fd = dup(si->fd);
si->fd shuold be checked to be valid.
dup error code should be checked.
> + }
> +
> +write_fd:
> + lseek(fd, -sizeof(*vi), SEEK_CUR);
> + if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) {
> + pr_perror("%d: Can't write img", pid);
> + return -1;
> }
>
> return 0;
> @@ -590,35 +563,20 @@ static int fixup_vma_fds(int pid, int fd)
> continue;
>
> if (vma_entry_is(&vi, VMA_FILE_PRIVATE) ||
> - vma_entry_is(&vi, VMA_FILE_SHARED) ||
> - vma_entry_is(&vi, VMA_ANON_SHARED)) {
> + vma_entry_is(&vi, VMA_FILE_SHARED)) {
>
> pr_info("%d: Fixing %016lx-%016lx %016lx vma\n",
> pid, vi.start, vi.end, vi.pgoff);
> if (try_fixup_file_map(pid, &vi, fd))
> return -1;
> + }
>
> + if (vma_entry_is(&vi, VMA_ANON_SHARED))
The describing pr_info should be here.
> if (try_fixup_shared_map(pid, &vi, fd))
> return -1;
> - }
> }
> -}
> -
> -static inline bool should_restore_page(int pid, unsigned long va)
> -{
> - struct shmem_info *si;
> - struct shmem_id *shmid;
> -
> - /*
> - * If this is not a shmem virtual address
> - * we should restore such page.
> - */
> - shmid = find_shmem_id(va);
> - if (!shmid)
> - return true;
>
> - si = find_shmem_page(shmems, va, shmid->shmid);
> - return si->pid == pid;
> + return 0;
> }
>
> /*
> @@ -640,36 +598,14 @@ static int fixup_pages_data(int pid, int fd)
> if (read_img(pgfd, &va) < 0)
> goto out;
>
> - if (va == 0)
> + if (final_page_va(va))
Chaning pfn terminator from 0 to ~0 deserves separate patch.
> break;
>
> write(fd, &va, sizeof(va));
> sendfile(fd, pgfd, NULL, PAGE_SIZE);
> }
> - close_safe(&pgfd);
> -
> - pgfd = open_image_ro(CR_FD_PAGES_SHMEM, pid);
> - if (pgfd < 0)
> - return -1;
>
> - while (1) {
> - if (read_img(pgfd, &va) < 0)
> - goto out;
> -
> - if (va == 0)
> - break;
> -
> - if (!should_restore_page(pid, va)) {
> - lseek(pgfd, PAGE_SIZE, SEEK_CUR);
> - continue;
> - }
> -
> - pr_info("%d: Restoring shared page: %16lx\n", pid, va);
> - write(fd, &va, sizeof(va));
> - sendfile(fd, pgfd, NULL, PAGE_SIZE);
> - }
> -
> - ret = write_img(fd, &zero_page_entry);
> + ret = 0;
> out:
> close_safe(&pgfd);
> return ret;
> @@ -1373,6 +1308,73 @@ static int restore_task_with_children(void *_arg)
> return restore_one_task(me->pid);
> }
>
> +static int restore_shmems(int pid)
> +{
> + int i, pfd, fd;
> + void *addr;
> + int ret;
> +
> + pr_info("Restore content of shared memory\n");
> + for (i = 0; i < shmems->nr_shmems; i++) {
> + struct shmem_info *si = &shmems->entries[i];
> + char path[128];
> + u64 offset;
> +
> + pfd = open_proc(si->pid, "map_files");
> + if (pfd < 0)
> + goto err;
> +
> + snprintf(path, PATH_MAX, "%lx-%lx",
> + si->start, si->end);
> +
> + fd = openat(pfd, path, O_RDWR);
Plain open_proc should work here as well.
> + close(pfd);
> + if (fd < 0) {
> + pr_err("can't open file: %s\n", path);
> + goto err;
> + }
> +
> + addr = mmap(NULL, si->size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
> + close(fd);
> + if (addr == MAP_FAILED) {
> + pr_err("can't map file: %s\n", path);
> + goto err;
> + }
> +
> + fd = open_image_ro(CR_FD_SHMEM_PAGES, si->shmid);
> + if (fd < 0) {
> + munmap(addr, si->size);
> + return -1;
> + }
> +
> + while (1) {
> + ret = read_img_buf(fd, &offset, sizeof(offset));
> + if (ret <= 0)
> + break;
> +
> + if (final_page_va(offset))
> + break;
> +
> + if (offset + PAGE_SIZE > si->size) {
> + break;
break? It should be an error.
> + }
Cleanup braces.
> + ret = read_img_buf(fd, addr + offset, PAGE_SIZE);
> + if (ret <= 0)
> + break;
> + }
> +
> + close(fd);
> + munmap(addr, si->size);
> + if (ret < 0)
> + goto err;
You can change this into break and avoid err: jump.
> + }
> +
> + return 0;
> +err:
> + return -1;
> +}
> +
> static int restore_root_task(int fd, struct cr_options *opts)
> {
> struct pstree_entry e;
> @@ -1430,7 +1437,6 @@ static int restore_root_task(int fd, struct cr_options *opts)
> perror("sigaction() failed\n");
> return -1;
> }
> -
Trash
> pr_info("Go on!!!\n");
> cr_wait_set(&task_entries->start, CR_STATE_COMPLETE);
>
> diff --git a/crtools.c b/crtools.c
> index a6e4739..5b39645 100644
> --- a/crtools.c
> +++ b/crtools.c
> @@ -22,7 +22,9 @@
> #include "syscall.h"
>
> static struct cr_options opts;
> -struct page_entry zero_page_entry;
> +struct page_entry zero_page_entry = {
> + .va = ~0LL,
> + };
Ugly formatting.
> char image_dir[PATH_MAX];
>
> /*
> @@ -46,9 +48,8 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
> .magic = PAGES_MAGIC,
> },
>
> - /* shared memory pages data */
> - [CR_FD_PAGES_SHMEM] = {
> - .fmt = FMT_FNAME_PAGES_SHMEM,
> + [CR_FD_SHMEM_PAGES] = {
> + .fmt = FMT_FNAME_SHMEM_PAGES,
> .magic = PAGES_MAGIC,
s/CR_FD_PAGES_SHMEM/CR_FD_SHMEM_PAGES/ can go in separate patch.
> },
>
> @@ -70,12 +71,6 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
> .magic = PSTREE_MAGIC,
> },
>
> - /* info about which memory areas are shared */
> - [CR_FD_SHMEM] = {
> - .fmt = FMT_FNAME_SHMEM,
> - .magic = SHMEM_MAGIC,
> - },
You forgot to kill the SHMEM_MAGIC.
> -
> /* info about signal handlers */
> [CR_FD_SIGACT] = {
> .fmt = FMT_FNAME_SIGACTS,
> diff --git a/include/image.h b/include/image.h
> index 2847440..98db9da 100644
> --- a/include/image.h
> +++ b/include/image.h
> @@ -55,12 +55,6 @@ struct fdinfo_entry {
> ((fe)->type == FDINFO_CWD) || \
> ((fe)->type == FDINFO_EXE))
>
> -struct shmem_entry {
> - u64 start;
> - u64 end;
> - u64 shmid;
> -} __packed;
> -
> struct pstree_entry {
> u32 pid;
> u32 nr_children;
> @@ -113,6 +107,7 @@ struct vma_entry {
> u64 start;
> u64 end;
> u64 pgoff;
> + u64 shmid;
Adding shmid can be in a separate patch. It will be small, but will make
the review simpler anyway.
> u32 prot;
> u32 flags;
> u32 status;
> } \
> diff --git a/parasite-syscall.c b/parasite-syscall.c
> index d1d63a8..d7f9ab6 100644
> --- a/parasite-syscall.c
> +++ b/parasite-syscall.c
> @@ -536,10 +536,6 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
> if (ret < 0)
> goto out;
>
> - ret = parasite_prep_file(CR_FD_PAGES_SHMEM, ctl, cr_fdset);
> - if (ret < 0)
> - goto out;
> -
> ret = parasite_execute(PARASITE_CMD_DUMPPAGES_INIT, ctl, st, sizeof(*st));
> if (ret < 0) {
> pr_err("Dumping pages failed with %li (%li) at %li\n",
> @@ -565,15 +561,15 @@ int parasite_dump_pages_seized(struct parasite_ctl *ctl, struct list_head *vma_a
> if (vma_area->vma.status & VMA_AREA_SYSVIPC)
> continue;
>
> + if (vma_area_is(vma_area, VMA_ANON_SHARED))
> + continue;
> +
This check shouldn't be here.
> pr_info_vma(vma_area);
> parasite_dumppages.vma_entry = vma_area->vma;
>
> if (vma_area_is(vma_area, VMA_ANON_PRIVATE) ||
> - vma_area_is(vma_area, VMA_FILE_PRIVATE))
> - parasite_dumppages.fd_type = PG_PRIV;
> - else if (vma_area_is(vma_area, VMA_ANON_SHARED))
> - parasite_dumppages.fd_type = PG_SHARED;
> - else {
> + vma_area_is(vma_area, VMA_FILE_PRIVATE)) {
> + } else {
> pr_warn("Unexpected VMA area found\n");
> continue;
> }
I don't see any changes in restorer.c which fixup the shmem mapping according
to its pgoff.
Thanks,
Pavel
More information about the CRIU
mailing list