[Devel] [RFC v14][PATCH 26/54] Restore anonymous- and file-mapped- shared memory

Oren Laadan orenl at cs.columbia.edu
Tue Apr 28 16:23:56 PDT 2009


The bulk of the work is in ckpt_read_vma(), which has been refactored:
the part that create the suitable 'struct file *' for the mapping is
now larger and moved to a separate function. What's left is to read
the VMA description, get the file pointer, create the mapping, and
proceed to read the contents in.

Both anonymous shared VMAs that have been read earlier (as indicated
by a look up to objhash) and file-mapped shared VMAs are skipped.
Anonymous shared VMAs seen for the first time have their contents
read in directly to the backing inode, as indexed by the page numbers
(as opposed to virtual addresses).

Changelog[v14]:
  - Introduce patch

Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
 checkpoint/memory.c        |   66 ++++++++++++++++++++++++++++++++-----------
 include/linux/checkpoint.h |    6 ++++
 include/linux/mm.h         |    2 +
 mm/filemap.c               |   14 ++++++++-
 mm/shmem.c                 |   47 +++++++++++++++++++++++++++++++
 5 files changed, 116 insertions(+), 19 deletions(-)

diff --git a/checkpoint/memory.c b/checkpoint/memory.c
index f96a50f..f5f8fcf 100644
--- a/checkpoint/memory.c
+++ b/checkpoint/memory.c
@@ -771,13 +771,36 @@ static int restore_read_page(struct ckpt_ctx *ctx, struct page *page, void *p)
 	return 0;
 }
 
+static struct page *bring_private_page(unsigned long addr)
+{
+	struct page *page;
+	int ret;
+
+	ret = get_user_pages(current, current->mm, addr, 1, 1, 1, &page, NULL);
+	if (ret < 0)
+		page = ERR_PTR(ret);
+	return page;
+}
+
+static struct page *bring_shared_page(unsigned long idx, struct inode *ino)
+{
+	struct page *page = NULL;
+	int ret;
+
+	ret = shmem_getpage(ino, idx, &page, SGP_WRITE, NULL);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (page)
+		unlock_page(page);
+	return page;
+}
+
 /**
  * read_pages_contents - read in data of pages in page-array chain
  * @ctx - restart context
  */
-static int read_pages_contents(struct ckpt_ctx *ctx)
+static int read_pages_contents(struct ckpt_ctx *ctx, struct inode *inode)
 {
-	struct mm_struct *mm = current->mm;
 	struct ckpt_pgarr *pgarr;
 	unsigned long *vaddrs;
 	char *buf;
@@ -787,17 +810,22 @@ static int read_pages_contents(struct ckpt_ctx *ctx)
 	if (!buf)
 		return -ENOMEM;
 
-	down_read(&mm->mmap_sem);
+	down_read(&current->mm->mmap_sem);
 	list_for_each_entry_reverse(pgarr, &ctx->pgarr_list, list) {
 		vaddrs = pgarr->vaddrs;
 		for (i = 0; i < pgarr->nr_used; i++) {
 			struct page *page;
 
 			_ckpt_debug(CKPT_DPAGE, "got page %#lx\n", vaddrs[i]);
-			ret = get_user_pages(current, mm, vaddrs[i],
-					     1, 1, 1, &page, NULL);
-			if (ret < 0)
+			if (inode)
+				page = bring_shared_page(vaddrs[i], inode);
+			else
+				page = bring_private_page(vaddrs[i]);
+
+			if (IS_ERR(page)) {
+				ret = PTR_ERR(page);
 				goto out;
+			}
 
 			ret = restore_read_page(ctx, page, buf);
 			page_cache_release(page);
@@ -808,14 +836,15 @@ static int read_pages_contents(struct ckpt_ctx *ctx)
 	}
 
  out:
-	up_read(&mm->mmap_sem);
+	up_read(&current->mm->mmap_sem);
 	kfree(buf);
 	return 0;
 }
 
 /**
- * restore_private_contents - restore contents of a VMA with private memory
+ * restore_memory_contents - restore contents of a memory region
  * @ctx - restart context
+ * @inode - backing inode
  *
  * Reads a header that specifies how many pages will follow, then reads
  * a list of virtual addresses into ctx->pgarr_list page-array chain,
@@ -823,7 +852,7 @@ static int read_pages_contents(struct ckpt_ctx *ctx)
  * these steps until reaching a header specifying "0" pages, which marks
  * the end of the contents.
  */
-static int restore_private_contents(struct ckpt_ctx *ctx)
+int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode)
 {
 	struct ckpt_hdr_pgarr *h;
 	unsigned long nr_pages;
@@ -845,7 +874,7 @@ static int restore_private_contents(struct ckpt_ctx *ctx)
 		ret = read_pages_vaddrs(ctx, nr_pages);
 		if (ret < 0)
 			break;
-		ret = read_pages_contents(ctx);
+		ret = read_pages_contents(ctx, inode);
 		if (ret < 0)
 			break;
 		pgarr_reset_all(ctx);
@@ -903,9 +932,9 @@ static unsigned long calc_map_flags_bits(unsigned long orig_vm_flags)
  * @file - file to map (NULL for anonymous)
  * @h - vma header data
  */
-static unsigned long generic_vma_restore(struct mm_struct *mm,
-					 struct file *file,
-					 struct ckpt_hdr_vma *h)
+unsigned long generic_vma_restore(struct mm_struct *mm,
+				  struct file *file,
+				  struct ckpt_hdr_vma *h)
 {
 	unsigned long vm_size, vm_start, vm_flags, vm_prot, vm_pgoff;
 	unsigned long addr;
@@ -952,7 +981,7 @@ int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
 	if (IS_ERR((void *) addr))
 		return PTR_ERR((void *) addr);
 
-	return restore_private_contents(ctx);
+	return restore_memory_contents(ctx, NULL);
 }
 
 /**
@@ -1012,16 +1041,19 @@ static struct restore_vma_ops restore_vma_ops[] = {
 	{
 		.vma_name = "ANON SHARED",
 		.vma_type = CKPT_VMA_SHM_ANON,
+		.restore = shmem_restore,
 	},
 	/* anonymous shared (skipped) */
 	{
 		.vma_name = "ANON SHARED (skip)",
 		.vma_type = CKPT_VMA_SHM_ANON_SKIP,
+		.restore = shmem_restore,
 	},
 	/* file-mapped shared */
 	{
 		.vma_name = "FILE SHARED",
 		.vma_type = CKPT_VMA_SHM_FILE,
+		.restore = filemap_restore,
 	},
 };
 
@@ -1040,15 +1072,15 @@ static int restore_vma(struct ckpt_ctx *ctx, struct mm_struct *mm)
 	if (IS_ERR(h))
 		return PTR_ERR(h);
 
-	ckpt_debug("vma %#lx-%#lx flags %#lx type %d vmaref %d\n",
+	ckpt_debug("vma %#lx-%#lx flags %#lx type %d vmaref %d inoref %d\n",
 		 (unsigned long) h->vm_start, (unsigned long) h->vm_end,
 		 (unsigned long) h->vm_flags, (int) h->vma_type,
-		 (int) h->vma_objref);
+		 (int) h->vma_objref, (int) h->ino_objref);
 
 	ret = -EINVAL;
 	if (h->vm_end < h->vm_start)
 		goto out;
-	if (h->vma_objref < 0)
+	if (h->vma_objref < 0 || h->ino_objref < 0)
 		goto out;
 	if (h->vma_type >= CKPT_VMA_MAX)
 		goto out;
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 53399f8..7f359ac 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -81,9 +81,15 @@ extern int shmem_vma_checkpoint(struct ckpt_ctx *ctx,
 				enum vma_type type,
 				int ino_objref);
 
+extern unsigned long generic_vma_restore(struct mm_struct *mm,
+					 struct file *file,
+					 struct ckpt_hdr_vma *hh);
+
 extern int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
 			       struct file *file, struct ckpt_hdr_vma *h);
 
+extern int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode);
+
 extern int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t);
 extern int restore_mm(struct ckpt_ctx *ctx);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d2f93a..3a40968 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1200,6 +1200,8 @@ extern int filemap_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
 			   struct ckpt_hdr_vma *hh);
 extern int special_mapping_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
 				   struct ckpt_hdr_vma *hh);
+extern int shmem_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
+			 struct ckpt_hdr_vma *hh);
 #endif
 
 /* readahead.c */
diff --git a/mm/filemap.c b/mm/filemap.c
index e9499d9..af83da7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1694,11 +1694,21 @@ int filemap_restore(struct ckpt_ctx *ctx,
 {
 	struct file *file;
 
+	if (h->vma_type == CKPT_VMA_FILE && (h->vm_flags & VM_SHARED))
+		return -EINVAL;
+	if (h->vma_type == CKPT_VMA_SHM_FILE && !(h->vm_flags & VM_SHARED))
+		return -EINVAL;
+
 	file = ckpt_obj_fetch(ctx, h->vma_objref, CKPT_OBJ_FILE);
-	if (IS_ERR(file))
+	if (!file)
+		return -EINVAL;
+	else if (IS_ERR(file))
 		return PTR_ERR(file);
 
-	return private_vma_restore(ctx, mm, file, h);
+	if (h->vma_type == CKPT_VMA_FILE)
+		return private_vma_restore(ctx, mm, file, h);
+	else
+		return generic_vma_restore(mm, file, h);
 }
 #else
 #define filemap_checkpoint NULL
diff --git a/mm/shmem.c b/mm/shmem.c
index 17847b0..fbb2528 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2405,6 +2405,53 @@ static int shmem_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma)
 
 	return shmem_vma_checkpoint(ctx, vma, vma_type, ino_objref);
 }
+
+int shmem_restore(struct ckpt_ctx *ctx,
+		  struct mm_struct *mm, struct ckpt_hdr_vma *h)
+{
+	unsigned long addr;
+	struct file *file;
+	int ret = 0;
+
+	file = ckpt_obj_fetch(ctx, h->ino_objref, CKPT_OBJ_FILE);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	/* if file is NULL, this is the premiere - create and insert */
+	if (!file) {
+		if (h->vma_type != CKPT_VMA_SHM_ANON)
+			return -EINVAL;
+		/*
+		 * in theory could pass NULL to mmap and let it create
+		 * the file. But, if 'shm_size != vm_end - vm_start',
+		 * or if 'vm_pgoff != 0', then the vma reflects only a
+		 * portion of the shm object and we need to "manually"
+		 * create the full shm object.
+		 */
+		file = shmem_file_setup("/dev/zero", h->ino_size, h->vm_flags);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+		ret = ckpt_obj_insert(ctx, file, h->ino_objref, CKPT_OBJ_FILE);
+		if (ret < 0)
+			goto out;
+	} else {
+		if (h->vma_type != CKPT_VMA_SHM_ANON_SKIP)
+			return -EINVAL;
+		/* Already need fput() for the file above; keep path simple */
+		get_file(file);
+	}
+
+	addr = generic_vma_restore(mm, file, h);
+	if (IS_ERR((void *) addr))
+		return PTR_ERR((void *) addr);
+
+	if (h->vma_type == CKPT_VMA_SHM_ANON)
+		ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
+ out:
+	fput(file);
+	return ret;
+}
+
 #else
 #define shmem_checkpoint NULL
 #endif /* CONFIG_CHECKPOINT */
-- 
1.5.4.3

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list