[Devel] [RFC v14][PATCH 35/54] Support for share memory address spaces
Oren Laadan
orenl at cs.columbia.edu
Tue Apr 28 16:24:05 PDT 2009
The task address space (task->mm) may be shared between processes if
CLONE_VM is used, and particularly among threads. Accordingly, treat
'task->mm' as a shared object: during checkpoint check against the
objhash and only dump the contents if seen for the first time. During
restart, likewise, only restore if it's a new instance, otherwise use
the one already registered in the objhash.
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
checkpoint/memory.c | 59 ++++++++++++++++++++++++++++++++++-----
checkpoint/objhash.c | 21 ++++++++++++++
checkpoint/process.c | 46 ++++++++++++++++++++++++++++---
include/linux/checkpoint.h | 7 +++-
include/linux/checkpoint_hdr.h | 7 +++++
5 files changed, 126 insertions(+), 14 deletions(-)
diff --git a/checkpoint/memory.c b/checkpoint/memory.c
index f5f8fcf..7a6e3f4 100644
--- a/checkpoint/memory.c
+++ b/checkpoint/memory.c
@@ -650,10 +650,9 @@ static int anonymous_checkpoint(struct ckpt_ctx *ctx,
return private_vma_checkpoint(ctx, vma, CKPT_VMA_ANON, 0);
}
-int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t)
+static int do_checkpoint_mm(struct ckpt_ctx *ctx, struct mm_struct *mm)
{
struct ckpt_hdr_mm *h;
- struct mm_struct *mm;
struct vm_area_struct *vma;
int exe_objref = 0;
int ret;
@@ -662,8 +661,6 @@ int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t)
if (!h)
return -ENOMEM;
- mm = get_task_mm(t);
-
down_read(&mm->mmap_sem);
/* FIX: need also mm->flags */
@@ -715,10 +712,26 @@ int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t)
out:
ckpt_hdr_put(ctx, h);
up_read(&mm->mmap_sem);
- mmput(mm);
return ret;
}
+int checkpoint_mm(struct ckpt_ctx *ctx, void *ptr)
+{
+ return do_checkpoint_mm(ctx, (struct mm_struct *) ptr);
+}
+
+int checkpoint_mm_obj(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+ struct mm_struct *mm;
+ int objref;
+
+ mm = get_task_mm(t);
+ objref = checkpoint_obj(ctx, mm, CKPT_OBJ_MM);
+ mmput(mm);
+
+ return objref;
+}
+
/*
* Restart
*
@@ -1120,7 +1133,7 @@ static int destroy_mm(struct mm_struct *mm)
return 0;
}
-int restore_mm(struct ckpt_ctx *ctx)
+static struct mm_struct *do_restore_mm(struct ckpt_ctx *ctx)
{
struct ckpt_hdr_mm *h;
struct mm_struct *mm;
@@ -1130,7 +1143,7 @@ int restore_mm(struct ckpt_ctx *ctx)
h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_MM);
if (IS_ERR(h))
- return PTR_ERR(h);
+ return (struct mm_struct *) h;
ckpt_debug("map_count %d\n", h->map_count);
@@ -1142,6 +1155,8 @@ int restore_mm(struct ckpt_ctx *ctx)
goto out;
if (h->exefile_objref < 0)
goto out;
+ if (h->map_count <= 0)
+ goto out;
mm = current->mm;
@@ -1191,5 +1206,33 @@ int restore_mm(struct ckpt_ctx *ctx)
ret = restore_mm_context(ctx, mm);
out:
ckpt_hdr_put(ctx, h);
- return ret;
+ return (ret < 0 ? ERR_PTR(ret) : mm);
}
+
+void *restore_mm(struct ckpt_ctx *ctx)
+{
+ return (void *) do_restore_mm(ctx);
+}
+
+int restore_mm_obj(struct ckpt_ctx *ctx, int mm_objref)
+{
+ struct mm_struct *mm;
+ int ret;
+
+ mm = ckpt_obj_fetch(ctx, mm_objref, CKPT_OBJ_MM);
+ if (!mm)
+ return -EINVAL;
+ else if (IS_ERR(mm))
+ return -EINVAL;
+
+ if (mm == current->mm)
+ return 0;
+
+ ret = exec_mmap(mm);
+ if (ret < 0)
+ return ret;
+
+ atomic_inc(&mm->mm_users);
+ return 0;
+}
+
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 8e43432..4fb5afa 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -57,6 +57,7 @@ void *restore_bad(struct ckpt_ctx *ctx)
* obj_no_{drop,grab}: for objects ignored/skipped
* obj_file_{drop,grab}: for file objects
* obj_inode_{drop,grab}: for inode objects
+ * obj_mm_{drop,grab}: for mm_struct objects
*/
static void obj_no_drop(void *ptr)
@@ -91,6 +92,17 @@ static void obj_inode_drop(void *ptr)
iput((struct inode *) ptr);
}
+static int obj_mm_grab(void *ptr)
+{
+ atomic_inc(&((struct mm_struct *) ptr)->mm_users);
+ return 0;
+}
+
+static void obj_mm_drop(void *ptr)
+{
+ mmput((struct mm_struct *) ptr);
+}
+
static struct ckpt_obj_ops ckpt_obj_ops[] = {
/* ignored object */
{
@@ -117,6 +129,15 @@ static struct ckpt_obj_ops ckpt_obj_ops[] = {
.checkpoint = checkpoint_bad, /* no c/r at inode level */
.restore = restore_bad, /* no c/r at inode level */
},
+ /* mm object */
+ {
+ .obj_name = "MM",
+ .obj_type = CKPT_OBJ_MM,
+ .ref_drop = obj_mm_drop,
+ .ref_grab = obj_mm_grab,
+ .checkpoint = checkpoint_mm,
+ .restore = restore_mm,
+ },
};
diff --git a/checkpoint/process.c b/checkpoint/process.c
index d5ee6fd..0bd4845 100644
--- a/checkpoint/process.c
+++ b/checkpoint/process.c
@@ -162,6 +162,28 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
return ret;
}
+static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+ struct ckpt_hdr_task_objs *h;
+ int mm_objref;
+ int ret;
+
+ mm_objref = checkpoint_mm_obj(ctx, t);
+ ckpt_debug("memory: objref %d\n", mm_objref);
+ if (mm_objref < 0)
+ return mm_objref;
+
+ h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_OBJS);
+ if (!h)
+ return -ENOMEM;
+
+ h->mm_objref = mm_objref;
+
+ ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h);
+ ckpt_hdr_put(ctx, h);
+ return ret;
+}
+
/* dump the entire state of a given task */
int checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
{
@@ -171,8 +193,8 @@ int checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
ckpt_debug("ret %d\n", ret);
if (ret < 0)
goto out;
- ret = checkpoint_mm(ctx, t);
- ckpt_debug("memory: ret %d\n", ret);
+ ret = checkpoint_task_objs(ctx, t);
+ ckpt_debug("objs: ret %d\n", ret);
if (ret < 0)
goto out;
ret = checkpoint_fd_table(ctx, t);
@@ -322,6 +344,22 @@ int restore_restart_block(struct ckpt_ctx *ctx)
return ret;
}
+static int restore_task_objs(struct ckpt_ctx *ctx)
+{
+ struct ckpt_hdr_task_objs *h;
+ int ret;
+
+ h = ckpt_read_obj_type(ctx, CKPT_HDR_TASK_OBJS, sizeof(*h));
+ if (IS_ERR(h))
+ return PTR_ERR(h);
+
+ ret = restore_mm_obj(ctx, h->mm_objref);
+ ckpt_debug("memory: ret %d\n", ret);
+
+ ckpt_hdr_put(ctx, h);
+ return ret;
+}
+
/* read the entire state of the current task */
int restore_task(struct ckpt_ctx *ctx)
{
@@ -331,8 +369,8 @@ int restore_task(struct ckpt_ctx *ctx)
ckpt_debug("ret %d\n", ret);
if (ret < 0)
goto out;
- ret = restore_mm(ctx);
- ckpt_debug("memory: ret %d\n", ret);
+ ret = restore_task_objs(ctx);
+ ckpt_debug("objs: ret %d\n", ret);
if (ret < 0)
goto out;
ret = restore_fd_table(ctx);
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index a662ea7..d554776 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -90,8 +90,11 @@ extern int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
extern int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode);
-extern int checkpoint_mm(struct ckpt_ctx *ctx, struct task_struct *t);
-extern int restore_mm(struct ckpt_ctx *ctx);
+extern int checkpoint_mm(struct ckpt_ctx *ctx, void *ptr);
+extern void *restore_mm(struct ckpt_ctx *ctx);
+
+extern int checkpoint_mm_obj(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int restore_mm_obj(struct ckpt_ctx *ctx, int objref);
#define CKPT_VMA_NOT_SUPPORTED \
(VM_IO | VM_HUGETLB | VM_NONLINEAR | VM_PFNMAP | \
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 59fab62..8b00fb8 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -49,6 +49,7 @@ enum {
CKPT_HDR_TREE = 101,
CKPT_HDR_TASK,
+ CKPT_HDR_TASK_OBJS,
CKPT_HDR_RESTART_BLOCK,
CKPT_HDR_THREAD,
CKPT_HDR_CPU,
@@ -78,6 +79,7 @@ enum obj_type {
CKPT_OBJ_IGNORE = 0,
CKPT_OBJ_FILE,
CKPT_OBJ_INODE,
+ CKPT_OBJ_MM,
CKPT_OBJ_MAX
};
@@ -139,6 +141,11 @@ struct ckpt_hdr_task {
__u32 task_comm_len;
} __attribute__((aligned(8)));
+struct ckpt_hdr_task_objs {
+ struct ckpt_hdr h;
+ __s32 mm_objref;
+} __attribute__((aligned(8)));
+
/* (thread) restart blocks */
struct ckpt_hdr_restart_block {
struct ckpt_hdr h;
--
1.5.4.3
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list