[Devel] [RFC v4][PATCH 4/9] Memory management (dump)
Oren Laadan
orenl at cs.columbia.edu
Tue Sep 9 00:42:29 PDT 2008
For each VMA, there is a 'struct cr_vma'; if the VMA is file-mapped,
it will be followed by the file name. The cr_vma->npages will tell
how many pages were dumped for this VMA. Then it will be followed
by the actual data: first a dump of the addresses of all dumped
pages (npages entries) followed by a dump of the contents of all
dumped pages (npages pages). Then will come the next VMA and so on.
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
arch/x86/mm/checkpoint.c | 30 +++
arch/x86/mm/restart.c | 1 +
checkpoint/Makefile | 3 +-
checkpoint/checkpoint.c | 53 ++++++
checkpoint/ckpt_arch.h | 1 +
checkpoint/ckpt_mem.c | 448 ++++++++++++++++++++++++++++++++++++++++++++
checkpoint/ckpt_mem.h | 35 ++++
checkpoint/sys.c | 23 ++-
include/asm-x86/ckpt_hdr.h | 5 +
include/linux/ckpt.h | 12 ++
include/linux/ckpt_hdr.h | 30 +++
11 files changed, 635 insertions(+), 6 deletions(-)
create mode 100644 checkpoint/ckpt_mem.c
create mode 100644 checkpoint/ckpt_mem.h
diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c
index 71d21e6..50cfd29 100644
--- a/arch/x86/mm/checkpoint.c
+++ b/arch/x86/mm/checkpoint.c
@@ -192,3 +192,33 @@ int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t)
cr_hbuf_put(ctx, sizeof(*hh));
return ret;
}
+
+/* dump the mm->context state */
+int cr_write_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int parent)
+{
+ struct cr_hdr h;
+ struct cr_hdr_mm_context *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ int ret;
+
+ h.type = CR_HDR_MM_CONTEXT;
+ h.len = sizeof(*hh);
+ h.parent = parent;
+
+ mutex_lock(&mm->context.lock);
+
+ hh->ldt_entry_size = LDT_ENTRY_SIZE;
+ hh->nldt = mm->context.size;
+
+ cr_debug("nldt %d\n", hh->nldt);
+
+ ret = cr_write_obj(ctx, &h, hh);
+ cr_hbuf_put(ctx, sizeof(*hh));
+ if (ret < 0)
+ return ret;
+
+ ret = cr_kwrite(ctx, mm->context.ldt, hh->nldt * LDT_ENTRY_SIZE);
+
+ mutex_unlock(&mm->context.lock);
+
+ return ret;
+}
diff --git a/arch/x86/mm/restart.c b/arch/x86/mm/restart.c
index 883a163..d7fb89a 100644
--- a/arch/x86/mm/restart.c
+++ b/arch/x86/mm/restart.c
@@ -8,6 +8,7 @@
* distribution for more details.
*/
+#include <linux/unistd.h>
#include <asm/desc.h>
#include <asm/i387.h>
diff --git a/checkpoint/Makefile b/checkpoint/Makefile
index d2df68c..3a0df6d 100644
--- a/checkpoint/Makefile
+++ b/checkpoint/Makefile
@@ -2,4 +2,5 @@
# Makefile for linux checkpoint/restart.
#
-obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o checkpoint.o restart.o
+obj-$(CONFIG_CHECKPOINT_RESTART) += sys.o checkpoint.o restart.o \
+ ckpt_mem.o
diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index d34a691..4dae775 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -55,6 +55,55 @@ int cr_write_string(struct cr_ctx *ctx, char *str, int len)
return cr_write_obj(ctx, &h, str);
}
+/**
+ * cr_fill_fname - return pathname of a given file
+ * @path: path name
+ * @root: relative root
+ * @buf: buffer for pathname
+ * @n: buffer length (in) and pathname length (out)
+ */
+static char *
+cr_fill_fname(struct path *path, struct path *root, char *buf, int *n)
+{
+ char *fname;
+
+ BUG_ON(!buf);
+ fname = __d_path(path, root, buf, *n);
+ if (!IS_ERR(fname))
+ *n = (buf + (*n) - fname);
+ return fname;
+}
+
+/**
+ * cr_write_fname - write a file name
+ * @ctx: checkpoint context
+ * @path: path name
+ * @root: relative root
+ */
+int cr_write_fname(struct cr_ctx *ctx, struct path *path, struct path *root)
+{
+ struct cr_hdr h;
+ char *buf, *fname;
+ int ret, flen;
+
+ flen = PATH_MAX;
+ buf = kmalloc(flen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ fname = cr_fill_fname(path, root, buf, &flen);
+ if (!IS_ERR(fname)) {
+ h.type = CR_HDR_FNAME;
+ h.len = flen;
+ h.parent = 0;
+ ret = cr_write_obj(ctx, &h, fname);
+ } else
+ ret = PTR_ERR(fname);
+
+ kfree(buf);
+ return ret;
+}
+
/* write the checkpoint header */
static int cr_write_head(struct cr_ctx *ctx)
{
@@ -164,6 +213,10 @@ static int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
cr_debug("task_struct: ret %d\n", ret);
if (ret < 0)
goto out;
+ ret = cr_write_mm(ctx, t);
+ cr_debug("memory: ret %d\n", ret);
+ if (ret < 0)
+ goto out;
ret = cr_write_thread(ctx, t);
cr_debug("thread: ret %d\n", ret);
if (ret < 0)
diff --git a/checkpoint/ckpt_arch.h b/checkpoint/ckpt_arch.h
index 5bd4703..9bd0ba4 100644
--- a/checkpoint/ckpt_arch.h
+++ b/checkpoint/ckpt_arch.h
@@ -2,6 +2,7 @@
int cr_write_thread(struct cr_ctx *ctx, struct task_struct *t);
int cr_write_cpu(struct cr_ctx *ctx, struct task_struct *t);
+int cr_write_mm_context(struct cr_ctx *ctx, struct mm_struct *mm, int parent);
int cr_read_thread(struct cr_ctx *ctx);
int cr_read_cpu(struct cr_ctx *ctx);
diff --git a/checkpoint/ckpt_mem.c b/checkpoint/ckpt_mem.c
new file mode 100644
index 0000000..2c93447
--- /dev/null
+++ b/checkpoint/ckpt_mem.c
@@ -0,0 +1,448 @@
+/*
+ * Checkpoint memory contents
+ *
+ * Copyright (C) 2008 Oren Laadan
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/pagemap.h>
+#include <linux/mm_types.h>
+#include <linux/ckpt.h>
+#include <linux/ckpt_hdr.h>
+
+#include "ckpt_arch.h"
+#include "ckpt_mem.h"
+
+/*
+ * utilities to alloc, free, and handle 'struct cr_pgarr' (page-arrays)
+ * (common to ckpt_mem.c and rstr_mem.c).
+ *
+ * The checkpoint context structure has two members for page-arrays:
+ * ctx->pgarr: list head of the page-array chain
+ * ctx->pgcur: tracks the "current" position in the chain
+ *
+ * During checkpoint (and restart) the chain tracks the dirty pages (page
+ * pointer and virtual address) of each MM. For a particular MM, these are
+ * always added to the "current" page-array (ctx->pgcur). The "current"
+ * page-array advances as necessary, and new page-array descriptors are
+ * allocated on-demand. Before the next MM, the chain is reset but not
+ * freed (that is, dereference page pointers and reset ctx->pgcur).
+ */
+
+#define CR_PGARR_ORDER 0
+#define CR_PGARR_TOTAL ((PAGE_SIZE << CR_PGARR_ORDER) / sizeof(void *))
+
+/* release pages referenced by a page-array */
+void cr_pgarr_unref_pages(struct cr_pgarr *pgarr)
+{
+ int n;
+
+ /* only checkpoint keeps references to pages */
+ if (pgarr->pages) {
+ cr_debug("nr_used %d\n", pgarr->nr_used);
+ for (n = pgarr->nr_used; n--; )
+ page_cache_release(pgarr->pages[n]);
+ }
+}
+
+/* free a single page-array object */
+static void cr_pgarr_free_one(struct cr_pgarr *pgarr)
+{
+ cr_pgarr_unref_pages(pgarr);
+ if (pgarr->pages)
+ free_pages((unsigned long) pgarr->pages, CR_PGARR_ORDER);
+ if (pgarr->vaddrs)
+ free_pages((unsigned long) pgarr->vaddrs, CR_PGARR_ORDER);
+ kfree(pgarr);
+}
+
+/* free a chain of page-arrays */
+void cr_pgarr_free(struct cr_ctx *ctx)
+{
+ struct cr_pgarr *pgarr, *tmp;
+
+ list_for_each_entry_safe(pgarr, tmp, &ctx->pgarr, list) {
+ list_del(&pgarr->list);
+ cr_pgarr_free_one(pgarr);
+ }
+ ctx->pgcur = NULL;
+}
+
+/* allocate a single page-array object */
+static struct cr_pgarr *cr_pgarr_alloc_one(void)
+{
+ struct cr_pgarr *pgarr;
+
+ pgarr = kzalloc(sizeof(*pgarr), GFP_KERNEL);
+ if (!pgarr)
+ return NULL;
+
+ pgarr->nr_free = CR_PGARR_TOTAL;
+ pgarr->nr_used = 0;
+
+ pgarr->pages = (struct page **)
+ __get_free_pages(GFP_KERNEL, CR_PGARR_ORDER);
+ pgarr->vaddrs = (unsigned long *)
+ __get_free_pages(GFP_KERNEL, CR_PGARR_ORDER);
+ if (!pgarr->pages || !pgarr->vaddrs) {
+ cr_pgarr_free_one(pgarr);
+ return NULL;
+ }
+
+ return pgarr;
+}
+
+/* cr_pgarr_alloc - return the next available pgarr in the page-array chain
+ * @ctx: checkpoint context
+ *
+ * Return the page-array following ctx->pgcur, extending the chain if needed
+ */
+struct cr_pgarr *cr_pgarr_alloc(struct cr_ctx *ctx)
+{
+ struct cr_pgarr *pgarr;
+
+ /* can reuse next element after ctx->pgcur ? */
+ pgarr = ctx->pgcur;
+ if (pgarr && !list_is_last(&pgarr->list, &ctx->pgarr)) {
+ pgarr = list_entry(pgarr->list.next, struct cr_pgarr, list);
+ goto out;
+ }
+
+ /* nope, need to extend the page-array chain */
+ pgarr = cr_pgarr_alloc_one();
+ if (!pgarr)
+ return NULL;
+
+ list_add_tail(&pgarr->list, &ctx->pgarr);
+ out:
+ ctx->pgcur = pgarr;
+ return pgarr;
+
+}
+
+/* reset the page-array chain (dropping page references if necessary) */
+void cr_pgarr_reset(struct cr_ctx *ctx)
+{
+ struct cr_pgarr *pgarr;
+
+ list_for_each_entry(pgarr, &ctx->pgarr, list) {
+ cr_pgarr_unref_pages(pgarr);
+ pgarr->nr_free = CR_PGARR_TOTAL;
+ pgarr->nr_used = 0;
+ }
+ ctx->pgcur = NULL;
+}
+
+
+/* return current page-array (and allocate if needed) */
+struct cr_pgarr *cr_pgarr_prep(struct cr_ctx *ctx
+)
+{
+ struct cr_pgarr *pgarr = ctx->pgcur;
+
+ if (!pgarr->nr_free)
+ pgarr = cr_pgarr_alloc(ctx);
+ return pgarr;
+}
+
+/*
+ * Checkpoint is outside the context of the checkpointee, so one cannot
+ * simply read pages from user-space. Instead, we scan the address space
+ * of the target to cherry-pick pages of interest. Selected pages are
+ * enlisted in a page-array chain (attached to the checkpoint context).
+ * To save their contents, each page is mapped to kernel memory and then
+ * dumped to the file descriptor.
+ */
+
+/**
+ * cr_vma_fill_pgarr - fill a page-array with addr/page tuples for a vma
+ * @ctx - checkpoint context
+ * @pgarr - page-array to fill
+ * @vma - vma to scan
+ * @start - start address (updated)
+ */
+static int cr_vma_fill_pgarr(struct cr_ctx *ctx, struct cr_pgarr *pgarr,
+ struct vm_area_struct *vma, unsigned long *start)
+{
+ unsigned long end = vma->vm_end;
+ unsigned long addr = *start;
+ struct page **pagep;
+ unsigned long *addrp;
+ int cow, nr, ret = 0;
+
+ nr = pgarr->nr_free;
+ pagep = &pgarr->pages[pgarr->nr_used];
+ addrp = &pgarr->vaddrs[pgarr->nr_used];
+ cow = !!vma->vm_file;
+
+ while (addr < end) {
+ struct page *page;
+
+ /*
+ * simplified version of get_user_pages(): already have vma,
+ * only need FOLL_TOUCH, and (for now) ignore fault stats.
+ *
+ * FIXME: consolidate with get_user_pages()
+ */
+
+ cond_resched();
+ while (!(page = follow_page(vma, addr, FOLL_TOUCH))) {
+ ret = handle_mm_fault(vma->vm_mm, vma, addr, 0);
+ if (ret & VM_FAULT_ERROR) {
+ if (ret & VM_FAULT_OOM)
+ ret = -ENOMEM;
+ else if (ret & VM_FAULT_SIGBUS)
+ ret = -EFAULT;
+ else
+ BUG();
+ break;
+ }
+ cond_resched();
+ ret = 0;
+ }
+
+ if (IS_ERR(page))
+ ret = PTR_ERR(page);
+
+ if (ret < 0)
+ break;
+
+ if (page == ZERO_PAGE(0)) {
+ page = NULL; /* zero page: ignore */
+ } else if (cow && page_mapping(page) != NULL) {
+ page = NULL; /* clean cow: ignore */
+ } else {
+ get_page(page);
+ *(addrp++) = addr;
+ *(pagep++) = page;
+ if (--nr == 0) {
+ addr += PAGE_SIZE;
+ break;
+ }
+ }
+
+ addr += PAGE_SIZE;
+ }
+
+ if (unlikely(ret < 0)) {
+ nr = pgarr->nr_free - nr;
+ while (nr--)
+ page_cache_release(*(--pagep));
+ return ret;
+ }
+
+ *start = addr;
+ return pgarr->nr_free - nr;
+}
+
+/**
+ * cr_vma_scan_pages - scan vma for pages that will need to be dumped
+ * @ctx - checkpoint context
+ * @vma - vma to scan
+ *
+ * lists of page pointes and corresponding virtual addresses are tracked
+ * inside ctx->pgarr page-array chain
+ */
+static int cr_vma_scan_pages(struct cr_ctx *ctx, struct vm_area_struct *vma)
+{
+ unsigned long addr = vma->vm_start;
+ unsigned long end = vma->vm_end;
+ struct cr_pgarr *pgarr;
+ int nr, total = 0;
+
+ while (addr < end) {
+ pgarr = cr_pgarr_prep(ctx);
+ if (!pgarr)
+ return -ENOMEM;
+ nr = cr_vma_fill_pgarr(ctx, pgarr, vma, &addr);
+ if (nr < 0)
+ return nr;
+ pgarr->nr_free -= nr;
+ pgarr->nr_used += nr;
+ total += nr;
+ }
+
+ cr_debug("total %d\n", total);
+ return total;
+}
+
+static int cr_page_write(struct cr_ctx *ctx, struct page *page, char *buf)
+{
+ void *ptr;
+
+ ptr = kmap_atomic(page, KM_USER1);
+ memcpy(buf, ptr, PAGE_SIZE);
+ kunmap_atomic(page, KM_USER1);
+
+ return cr_kwrite(ctx, buf, PAGE_SIZE);
+}
+
+/**
+ * cr_vma_dump_pages - dump pages listed in the ctx page-array chain
+ * @ctx - checkpoint context
+ * @total - total number of pages
+ *
+ * First dump all virtual addresses, followed by the contents of all pages
+ */
+static int cr_vma_dump_pages(struct cr_ctx *ctx, int total)
+{
+ struct cr_pgarr *pgarr;
+ char *buf;
+ int i, ret = 0;
+
+ if (!total)
+ return 0;
+
+ list_for_each_entry(pgarr, &ctx->pgarr, list) {
+ ret = cr_kwrite(ctx, pgarr->vaddrs,
+ pgarr->nr_used * sizeof(*pgarr->vaddrs));
+ if (ret < 0)
+ return ret;
+ }
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ list_for_each_entry(pgarr, &ctx->pgarr, list) {
+ for (i = 0; i < pgarr->nr_used; i++) {
+ ret = cr_page_write(ctx, pgarr->pages[i], buf);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ out:
+ kfree(buf);
+ return ret;
+}
+
+static int cr_write_vma(struct cr_ctx *ctx, struct vm_area_struct *vma)
+{
+ struct cr_hdr h;
+ struct cr_hdr_vma *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ int vma_type, nr, ret;
+
+ h.type = CR_HDR_VMA;
+ h.len = sizeof(*hh);
+ h.parent = 0;
+
+ hh->vm_start = vma->vm_start;
+ hh->vm_end = vma->vm_end;
+ hh->vm_page_prot = vma->vm_page_prot.pgprot;
+ hh->vm_flags = vma->vm_flags;
+ hh->vm_pgoff = vma->vm_pgoff;
+
+ if (vma->vm_flags & (VM_SHARED | VM_IO | VM_HUGETLB | VM_NONLINEAR)) {
+ pr_warning("CR: unsupported VMA %#lx\n", vma->vm_flags);
+ return -ETXTBSY;
+ }
+
+ /* by default assume anon memory */
+ vma_type = CR_VMA_ANON;
+
+ /* if there is a backing file, assume private-mapped */
+ /* (FIX: check if the file is unlinked) */
+ if (vma->vm_file)
+ vma_type = CR_VMA_FILE;
+
+ hh->vma_type = vma_type;
+
+ /*
+ * it seems redundant now, but we do it in 3 steps for because:
+ * first, the logic is simpler when we how many pages before
+ * dumping them; second, a future optimization will defer the
+ * writeout (dump, and free) to a later step; in which case all
+ * the pages to be dumped will be aggregated on the checkpoint ctx
+ */
+
+ /* (1) scan: scan through the PTEs of the vma to count the pages
+ * to dump (and later make those pages COW), and keep the list of
+ * pages (and a reference to each page) on the checkpoint ctx */
+ nr = cr_vma_scan_pages(ctx, vma);
+ if (nr < 0)
+ return nr;
+
+ hh->nr_pages = nr;
+ ret = cr_write_obj(ctx, &h, hh);
+ cr_hbuf_put(ctx, sizeof(*hh));
+ if (ret < 0)
+ return ret;
+ /* save the file name, if relevant */
+ if (vma->vm_file)
+ ret = cr_write_fname(ctx, &vma->vm_file->f_path, ctx->vfsroot);
+
+ if (ret < 0)
+ return ret;
+
+ /* (2) dump: write out the addresses of all pages in the list (on
+ * the checkpoint ctx) followed by the contents of all pages */
+ ret = cr_vma_dump_pages(ctx, nr);
+
+ /* (3) free: release the extra references to the pages in the list */
+ cr_pgarr_reset(ctx);
+
+ return ret;
+}
+
+int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t)
+{
+ struct cr_hdr h;
+ struct cr_hdr_mm *hh = cr_hbuf_get(ctx, sizeof(*hh));
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ int objref, ret;
+
+ h.type = CR_HDR_MM;
+ h.len = sizeof(*hh);
+ h.parent = task_pid_vnr(t);
+
+ mm = get_task_mm(t);
+
+ objref = 0; /* will be meaningful with multiple processes */
+ hh->objref = objref;
+
+ down_read(&mm->mmap_sem);
+
+ hh->start_code = mm->start_code;
+ hh->end_code = mm->end_code;
+ hh->start_data = mm->start_data;
+ hh->end_data = mm->end_data;
+ hh->start_brk = mm->start_brk;
+ hh->brk = mm->brk;
+ hh->start_stack = mm->start_stack;
+ hh->arg_start = mm->arg_start;
+ hh->arg_end = mm->arg_end;
+ hh->env_start = mm->env_start;
+ hh->env_end = mm->env_end;
+
+ hh->map_count = mm->map_count;
+
+ /* FIX: need also mm->flags */
+
+ ret = cr_write_obj(ctx, &h, hh);
+ cr_hbuf_put(ctx, sizeof(*hh));
+ if (ret < 0)
+ goto out;
+
+ /* write the vma's */
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ ret = cr_write_vma(ctx, vma);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = cr_write_mm_context(ctx, mm, objref);
+
+ out:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return ret;
+}
diff --git a/checkpoint/ckpt_mem.h b/checkpoint/ckpt_mem.h
new file mode 100644
index 0000000..8ee211d
--- /dev/null
+++ b/checkpoint/ckpt_mem.h
@@ -0,0 +1,35 @@
+#ifndef _CHECKPOINT_CKPT_MEM_H_
+#define _CHECKPOINT_CKPT_MEM_H_
+/*
+ * Generic container checkpoint-restart
+ *
+ * Copyright (C) 2008 Oren Laadan
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of the Linux
+ * distribution for more details.
+ */
+
+#include <linux/mm_types.h>
+
+/*
+ * page-array chains: each cr_pgarr describes a set of <strcut page *,vaddr>
+ * tuples (where vaddr is the virtual address of a page in a particular mm).
+ * Specifically, we use separate arrays so that all vaddrs can be written
+ * and read at once.
+ */
+
+struct cr_pgarr {
+ unsigned long *vaddrs;
+ struct page **pages;
+ unsigned int nr_used; /* how many entries already used */
+ unsigned int nr_free; /* how many entries still free */
+ struct list_head list;
+};
+
+void cr_pgarr_reset(struct cr_ctx *ctx);
+void cr_pgarr_free(struct cr_ctx *ctx);
+struct cr_pgarr *cr_pgarr_alloc(struct cr_ctx *ctx);
+struct cr_pgarr *cr_pgarr_prep(struct cr_ctx *ctx);
+
+#endif /* _CHECKPOINT_CKPT_MEM_H_ */
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 113e0df..8141161 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -16,6 +16,8 @@
#include <linux/capability.h>
#include <linux/ckpt.h>
+#include "ckpt_mem.h"
+
/*
* helpers to write/read to/from the image file descriptor
*
@@ -110,7 +112,6 @@ int cr_kread(struct cr_ctx *ctx, void *buf, int count)
return ret;
}
-
/*
* helpers to manage CR contexts: allocated for each checkpoint and/or
* restart operation, and persists until the operation is completed.
@@ -126,6 +127,11 @@ void cr_ctx_free(struct cr_ctx *ctx)
free_pages((unsigned long) ctx->hbuf, CR_HBUF_ORDER);
+ if (ctx->vfsroot)
+ path_put(ctx->vfsroot);
+
+ cr_pgarr_free(ctx);
+
kfree(ctx);
}
@@ -145,10 +151,13 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
get_file(ctx->file);
ctx->hbuf = (void *) __get_free_pages(GFP_KERNEL, CR_HBUF_ORDER);
- if (!ctx->hbuf) {
- cr_ctx_free(ctx);
- return ERR_PTR(-ENOMEM);
- }
+ if (!ctx->hbuf)
+ goto nomem;
+
+ /* assume checkpointer is in container's root vfs */
+ /* FIXME: this works for now, but will change with real containers */
+ ctx->vfsroot = ¤t->fs->root;
+ path_get(ctx->vfsroot);
ctx->pid = pid;
ctx->flags = flags;
@@ -156,6 +165,10 @@ struct cr_ctx *cr_ctx_alloc(pid_t pid, int fd, unsigned long flags)
ctx->crid = atomic_inc_return(&cr_ctx_count);
return ctx;
+
+ nomem:
+ cr_ctx_free(ctx);
+ return ERR_PTR(-ENOMEM);
}
/*
diff --git a/include/asm-x86/ckpt_hdr.h b/include/asm-x86/ckpt_hdr.h
index 44a903c..6bc61ac 100644
--- a/include/asm-x86/ckpt_hdr.h
+++ b/include/asm-x86/ckpt_hdr.h
@@ -69,4 +69,9 @@ struct cr_hdr_cpu {
} __attribute__((aligned(8)));
+struct cr_hdr_mm_context {
+ __s16 ldt_entry_size;
+ __s16 nldt;
+} __attribute__((aligned(8)));
+
#endif /* __ASM_X86_CKPT_HDR__H */
diff --git a/include/linux/ckpt.h b/include/linux/ckpt.h
index 91f4998..5c62a90 100644
--- a/include/linux/ckpt.h
+++ b/include/linux/ckpt.h
@@ -10,6 +10,9 @@
* distribution for more details.
*/
+#include <linux/path.h>
+#include <linux/fs.h>
+
#define CR_VERSION 1
struct cr_ctx {
@@ -24,6 +27,11 @@ struct cr_ctx {
void *hbuf; /* temporary buffer for headers */
int hpos; /* position in headers buffer */
+
+ struct list_head pgarr; /* page array for dumping VMA contents */
+ struct cr_pgarr *pgcur; /* current position in page array */
+
+ struct path *vfsroot; /* container root (FIXME) */
};
/* cr_ctx: flags */
@@ -46,11 +54,15 @@ struct cr_hdr;
int cr_write_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf);
int cr_write_string(struct cr_ctx *ctx, char *str, int len);
+int cr_write_fname(struct cr_ctx *ctx, struct path *path, struct path *root);
int cr_read_obj(struct cr_ctx *ctx, struct cr_hdr *h, void *buf, int n);
int cr_read_obj_type(struct cr_ctx *ctx, void *buf, int n, int type);
int cr_read_string(struct cr_ctx *ctx, void *str, int len);
+int cr_write_mm(struct cr_ctx *ctx, struct task_struct *t);
+int cr_read_mm(struct cr_ctx *ctx);
+
int do_checkpoint(struct cr_ctx *ctx);
int do_restart(struct cr_ctx *ctx);
diff --git a/include/linux/ckpt_hdr.h b/include/linux/ckpt_hdr.h
index e66f322..ac77d7d 100644
--- a/include/linux/ckpt_hdr.h
+++ b/include/linux/ckpt_hdr.h
@@ -32,6 +32,7 @@ struct cr_hdr {
enum {
CR_HDR_HEAD = 1,
CR_HDR_STRING,
+ CR_HDR_FNAME,
CR_HDR_TASK = 101,
CR_HDR_THREAD,
@@ -82,4 +83,33 @@ struct cr_hdr_task {
__s32 task_comm_len;
} __attribute__((aligned(8)));
+struct cr_hdr_mm {
+ __u32 objref; /* identifier for shared objects */
+ __u32 map_count;
+
+ __u64 start_code, end_code, start_data, end_data;
+ __u64 start_brk, brk, start_stack;
+ __u64 arg_start, arg_end, env_start, env_end;
+
+} __attribute__((aligned(8)));
+
+/* vma subtypes */
+enum vm_type {
+ CR_VMA_ANON = 1,
+ CR_VMA_FILE
+};
+
+struct cr_hdr_vma {
+ __u32 vma_type;
+ __u32 _padding;
+ __s64 nr_pages;
+
+ __u64 vm_start;
+ __u64 vm_end;
+ __u64 vm_page_prot;
+ __u64 vm_flags;
+ __u64 vm_pgoff;
+
+} __attribute__((aligned(8)));
+
#endif /* _CHECKPOINT_CKPT_HDR_H_ */
--
1.5.4.3
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list