[CRIU] [PATCH 2/2] rst: Collect VmaEntries only once on restore
Pavel Emelyanov
xemul at parallels.com
Mon Feb 3 03:12:22 PST 2014
Right now we do it two times -- on shmem prepare and
on the restore itself. Make collection only once as
we do for fdinfo-s -- root task reads all stuff in and
populates tasks' rst_info with it.
Signed-off-by: Pavel Emelyanov <xemul at parallels.com>
---
cr-restore.c | 104 ++++++++++++++++-------------------------------------
include/rst_info.h | 3 ++
include/vma.h | 8 +++++
mem.c | 36 ++++++++++++++-----
pstree.c | 1 +
5 files changed, 69 insertions(+), 83 deletions(-)
diff --git a/cr-restore.c b/cr-restore.c
index dc719b0..e1360a1 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -90,8 +90,6 @@ static int prepare_rlimits(int pid);
static int prepare_posix_timers(int pid);
static int prepare_signals(int pid);
-static VM_AREA_LIST(rst_vmas); /* XXX .longest is NOT tracked for this guy */
-
static int shmem_remap(void *old_addr, void *new_addr, unsigned long size)
{
void *ret;
@@ -321,6 +319,7 @@ static int restore_priv_vma_content(pid_t pid)
{
struct vma_area *vma;
int ret = 0;
+ struct list_head *vmas = ¤t->rst->vmas.h;
unsigned int nr_restored = 0;
unsigned int nr_shared = 0;
@@ -328,7 +327,7 @@ static int restore_priv_vma_content(pid_t pid)
unsigned long va;
struct page_read pr;
- vma = list_first_entry(&rst_vmas.h, struct vma_area, list);
+ vma = list_first_entry(vmas, struct vma_area, list);
ret = open_page_read(pid, &pr);
if (ret)
return -1;
@@ -356,7 +355,7 @@ static int restore_priv_vma_content(pid_t pid)
* read from image file.
*/
while (va >= vma->vma.end) {
- if (vma->list.next == &rst_vmas.h)
+ if (vma->list.next == vmas)
goto err_addr;
vma = list_entry(vma->list.next, struct vma_area, list);
}
@@ -413,7 +412,7 @@ err_read:
return ret;
/* Remove pages, which were not shared with a child */
- list_for_each_entry(vma, &rst_vmas.h, list) {
+ list_for_each_entry(vma, vmas, list) {
unsigned long size, i = 0;
void *addr = decode_pointer(vma->premmaped_addr);
@@ -456,84 +455,44 @@ err_addr:
static int prepare_mappings(int pid)
{
- int fd, ret = 0;
- LIST_HEAD(parent_vmas);
+ int ret = 0;
struct vma_area *pvma, *vma;
void *addr;
+ struct vm_area_list *vmas;
+ struct list_head *parent_vmas = NULL;
+ LIST_HEAD(empty);
void *old_premmapped_addr = NULL;
unsigned long old_premmapped_len, pstart = 0;
- rst_vmas.nr = 0;
- rst_vmas.priv_size = 0;
+ vmas = ¤t->rst->vmas;
+ if (vmas->nr == 0) /* Zombie */
+ goto out;
+
/*
* Keep parent vmas at hands to check whether we can "inherit" them.
* See comments in map_private_vma.
*/
- list_replace_init(&rst_vmas.h, &parent_vmas);
-
- /* Skip errors, because a zombie doesn't have an image of vmas */
- fd = open_image(CR_FD_VMAS, O_RSTR, pid);
- if (fd < 0) {
- if (errno != ENOENT)
- ret = fd;
- goto out;
- }
-
- while (1) {
- struct vma_area *vma;
- VmaEntry *e;
-
- ret = -1;
- vma = alloc_vma_area();
- if (!vma)
- break;
-
- ret = pb_read_one_eof(fd, &e, PB_VMA);
- if (ret <= 0) {
- xfree(vma);
- break;
- }
-
- rst_vmas.nr++;
- list_add_tail(&vma->list, &rst_vmas.h);
-
- vma->vma = *e;
- vma_entry__free_unpacked(e, NULL);
-
- if (vma->vma.fd != -1) {
- ret = -1;
- pr_err("Error in vma->fd setting (%Ld)\n",
- (unsigned long long)vma->vma.fd);
- break;
- }
-
- if (vma_priv(&vma->vma)) {
- rst_vmas.priv_size += vma_area_len(vma);
- if (vma->vma.flags & MAP_GROWSDOWN)
- rst_vmas.priv_size += PAGE_SIZE;
- }
- }
- close(fd);
-
- if (ret < 0)
- goto out;
+ if (current->parent)
+ parent_vmas = ¤t->parent->rst->vmas.h;
+ else
+ parent_vmas = ∅
/* Reserve a place for mapping private vma-s one by one */
- addr = mmap(NULL, rst_vmas.priv_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ addr = mmap(NULL, vmas->priv_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
if (addr == MAP_FAILED) {
- pr_perror("Unable to reserve memory (%lu bytes)", rst_vmas.priv_size);
+ pr_perror("Unable to reserve memory (%lu bytes)", vmas->priv_size);
return -1;
}
old_premmapped_addr = current->rst->premmapped_addr;
old_premmapped_len = current->rst->premmapped_len;
current->rst->premmapped_addr = addr;
- current->rst->premmapped_len = rst_vmas.priv_size;
+ current->rst->premmapped_len = vmas->priv_size;
- pvma = list_entry(&parent_vmas, struct vma_area, list);
+ pvma = list_entry(parent_vmas, struct vma_area, list);
- list_for_each_entry(vma, &rst_vmas.h, list) {
+ list_for_each_entry(vma, &vmas->h, list) {
if (pstart > vma->vma.start) {
ret = -1;
pr_err("VMA-s are not sorted in the image file\n");
@@ -544,7 +503,7 @@ static int prepare_mappings(int pid)
if (!vma_priv(&vma->vma))
continue;
- ret = map_private_vma(pid, vma, addr, &pvma, &parent_vmas);
+ ret = map_private_vma(pid, vma, addr, &pvma, parent_vmas);
if (ret < 0)
break;
@@ -555,12 +514,6 @@ static int prepare_mappings(int pid)
ret = restore_priv_vma_content(pid);
out:
- while (!list_empty(&parent_vmas)) {
- vma = list_first_entry(&parent_vmas, struct vma_area, list);
- list_del(&vma->list);
- xfree(vma);
- }
-
if (old_premmapped_addr &&
munmap(old_premmapped_addr, old_premmapped_len)) {
pr_perror("Unable to unmap %p(%lx)",
@@ -579,8 +532,9 @@ out:
static int unmap_guard_pages()
{
struct vma_area *vma;
+ struct list_head *vmas = ¤t->rst->vmas.h;
- list_for_each_entry(vma, &rst_vmas.h, list) {
+ list_for_each_entry(vma, vmas, list) {
if (!vma_priv(&vma->vma))
continue;
@@ -601,8 +555,9 @@ static int open_vmas(int pid)
{
struct vma_area *vma;
int ret = 0;
+ struct list_head *vmas = ¤t->rst->vmas.h;
- list_for_each_entry(vma, &rst_vmas.h, list) {
+ list_for_each_entry(vma, vmas, list) {
if (!(vma_entry_is(&vma->vma, VMA_AREA_REGULAR)))
continue;
@@ -2213,6 +2168,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
unsigned long vdso_rt_delta = 0;
struct vm_area_list self_vmas;
+ struct vm_area_list *vmas = ¤t->rst->vmas;
int i;
pr_info("Restore via sigreturn\n");
@@ -2233,7 +2189,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
*/
tgt_vmas = rst_mem_cpos(RM_PRIVATE);
- list_for_each_entry(vma, &rst_vmas.h, list) {
+ list_for_each_entry(vma, &vmas->h, list) {
VmaEntry *vme;
vme = rst_mem_alloc(sizeof(*vme), RM_PRIVATE);
@@ -2295,7 +2251,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
* or inited from scratch).
*/
- exec_mem_hint = restorer_get_vma_hint(pid, &rst_vmas.h, &self_vmas.h,
+ exec_mem_hint = restorer_get_vma_hint(pid, &vmas->h, &self_vmas.h,
restore_bootstrap_len);
if (exec_mem_hint == -1) {
pr_err("No suitable area for task_restore bootstrap (%ldK)\n",
@@ -2370,7 +2326,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
task_args->shmems = rst_mem_remap_ptr(rst_shmems, RM_SHREMAP);
task_args->nr_shmems = nr_shmems;
- task_args->nr_vmas = rst_vmas.nr;
+ task_args->nr_vmas = vmas->nr;
task_args->tgt_vmas = rst_mem_remap_ptr(tgt_vmas, RM_PRIVATE);
task_args->timer_n = posix_timers_nr;
diff --git a/include/rst_info.h b/include/rst_info.h
index 184c6f7..07259ba 100644
--- a/include/rst_info.h
+++ b/include/rst_info.h
@@ -3,6 +3,7 @@
#include "lock.h"
#include "list.h"
+#include "vma.h"
struct task_entries {
int nr_threads, nr_tasks, nr_helpers;
@@ -37,6 +38,8 @@ struct rst_info {
int service_fd_id;
struct fdt *fdt;
+ struct vm_area_list vmas;
+
union {
struct pstree_item *pgrp_leader;
futex_t pgrp_set;
diff --git a/include/vma.h b/include/vma.h
index 03994a6..d6fcca5 100644
--- a/include/vma.h
+++ b/include/vma.h
@@ -13,6 +13,14 @@ struct vm_area_list {
#define VM_AREA_LIST(name) struct vm_area_list name = { .h = LIST_HEAD_INIT(name.h), .nr = 0, }
+static inline void vm_area_list_init(struct vm_area_list *vml)
+{
+ INIT_LIST_HEAD(&vml->h);
+ vml->nr = 0;
+ vml->priv_size = 0;
+ vml->longest = 0;
+}
+
struct vma_area {
struct list_head list;
VmaEntry vma;
diff --git a/mem.c b/mem.c
index d3787dd..c1cb46b 100644
--- a/mem.c
+++ b/mem.c
@@ -17,6 +17,7 @@
#include "vma.h"
#include "shmem.h"
#include "pstree.h"
+#include "restorer.h"
#include "protobuf.h"
#include "protobuf/pagemap.pb-c.h"
@@ -344,7 +345,7 @@ int prepare_mm_pid(struct pstree_item *i)
{
pid_t pid = i->pid.virt;
int fd, ret = -1;
- VmaEntry *vi;
+ struct rst_info *ri = i->rst;
fd = open_image(CR_FD_VMAS, O_RSTR, pid);
if (fd < 0) {
@@ -355,21 +356,38 @@ int prepare_mm_pid(struct pstree_item *i)
}
while (1) {
+ struct vma_area *vma;
+ VmaEntry *vi;
+
+ ret = -1;
+ vma = alloc_vma_area();
+ if (!vma)
+ break;
+
ret = pb_read_one_eof(fd, &vi, PB_VMA);
- if (ret <= 0)
+ if (ret <= 0) {
+ xfree(vma);
break;
+ }
- pr_info("vma 0x%"PRIx64" 0x%"PRIx64"\n", vi->start, vi->end);
+ ri->vmas.nr++;
+ vma->vma = *vi;
+ list_add_tail(&vma->list, &ri->vmas.h);
+ vma_entry__free_unpacked(vi, NULL);
- if (!vma_entry_is(vi, VMA_ANON_SHARED) ||
- vma_entry_is(vi, VMA_AREA_SYSVIPC)) {
- vma_entry__free_unpacked(vi, NULL);
- continue;
+ if (vma_priv(&vma->vma)) {
+ ri->vmas.priv_size += vma_area_len(vma);
+ if (vma->vma.flags & MAP_GROWSDOWN)
+ ri->vmas.priv_size += PAGE_SIZE;
}
- ret = collect_shmem(pid, vi);
- vma_entry__free_unpacked(vi, NULL);
+ pr_info("vma 0x%"PRIx64" 0x%"PRIx64"\n", vma->vma.start, vma->vma.end);
+
+ if (!vma_entry_is(&vma->vma, VMA_ANON_SHARED) ||
+ vma_entry_is(&vma->vma, VMA_AREA_SYSVIPC))
+ continue;
+ ret = collect_shmem(pid, &vma->vma);
if (ret)
break;
}
diff --git a/pstree.c b/pstree.c
index b83453f..fc0f707 100644
--- a/pstree.c
+++ b/pstree.c
@@ -143,6 +143,7 @@ struct pstree_item *__alloc_pstree_item(bool rst)
if (!item)
return NULL;
memset(item, 0, sizeof(*item) + sizeof(item->rst[0]));
+ vm_area_list_init(&item->rst[0].vmas);
}
INIT_LIST_HEAD(&item->children);
--
1.8.4.2
More information about the CRIU
mailing list