[Devel] [PATCH 2/3] c/r: Add UTS support (v7)

Dan Smith danms at us.ibm.com
Mon Apr 6 09:05:55 PDT 2009


This patch adds a "phase" of checkpoint that saves out information about any
namespaces the task(s) may have.  Do this by tracking the namespace objects
of the tasks and making sure that tasks with the same namespace that follow
get properly referenced in the checkpoint stream.

I tested this with single and multiple task restore, on top of Oren's
v13 tree.

Changes:
  - Remove the kernel restore path
  - Punt on nested namespaces
  - Use __NEW_UTS_LEN in nodename and domainname buffers
  - Add a note to Documentation/checkpoint/internals.txt to indicate where
    in the save/restore process the UTS information is kept
  - Store (and track) the objref of the namespace itself instead of the
    nsproxy (based on comments from Dave on IRC)
  - Remove explicit check for non-root nsproxy
  - Store the nodename and domainname lengths and use cr_write_string()
    to store the actual name strings
  - Catch failure of cr_obj_add_ptr() in cr_write_namespaces()
  - Remove "types" bitfield and use the "is this new" flag to determine
    whether or not we should write out a new ns descriptor
  - Replace kernel restore path
  - Move the namespace information to be directly after the task
    information record
  - Update Documentation to reflect new location of namespace info
  - Support checkpoint and restart of nested UTS namespaces
  - Check nodename and domainname length on restart (!)
  - memset() the nodename and domainname buffers on restart
  - Change uts to newuts in cr_write_namespaces()
  - Check return values of cr_hbuf_get()

Cc: orenl at cs.columbia.edu
Signed-off-by: Dan Smith <danms at us.ibm.com>
---
 Documentation/checkpoint/internals.txt |    1 +
 checkpoint/Makefile                    |    1 +
 checkpoint/checkpoint.c                |   73 ++++++++++++++++++++-
 checkpoint/objhash.c                   |    7 ++
 checkpoint/restart.c                   |  111 ++++++++++++++++++++++++++++++++
 include/linux/checkpoint.h             |    1 +
 include/linux/checkpoint_hdr.h         |   11 +++
 7 files changed, 202 insertions(+), 3 deletions(-)

diff --git a/Documentation/checkpoint/internals.txt b/Documentation/checkpoint/internals.txt
index c741b6c..bdd202c 100644
--- a/Documentation/checkpoint/internals.txt
+++ b/Documentation/checkpoint/internals.txt
@@ -17,6 +17,7 @@ The order of operations, both save and restore, is as follows:
   -> thread state: elements of thread_struct and thread_info
   -> CPU state: registers etc, including FPU
   -> memory state: memory address space layout and contents
+  -> namespace information
   -> filesystem state: [TBD] filesystem namespace state, chroot, cwd, etc
   -> files state: open file descriptors and their state
   -> signals state: [TBD] pending signals and signal handling state
diff --git a/checkpoint/Makefile b/checkpoint/Makefile
index 607d864..55c5c3d 100644
--- a/checkpoint/Makefile
+++ b/checkpoint/Makefile
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_CHECKPOINT) += sys.o checkpoint.o restart.o objhash.o \
 		ckpt_mem.o rstr_mem.o ckpt_file.o rstr_file.o
+EXTRA_CFLAGS += -DDEBUG
diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index c2f0e16..ae0d3f8 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -213,6 +213,72 @@ static int cr_write_tail(struct cr_ctx *ctx)
 	return ret;
 }
 
+static int cr_write_utsns(struct cr_ctx *ctx, struct new_utsname *name)
+{
+	struct cr_hdr h;
+	struct cr_hdr_utsns *hh = cr_hbuf_get(ctx, sizeof(*hh));
+	int ret;
+
+	if (!hh)
+		return -ENOMEM;
+
+	h.type = CR_HDR_UTSNS;
+	h.len = sizeof(*hh);
+
+	hh->nodename_len = strlen(name->nodename) + 1;
+	hh->domainname_len = strlen(name->domainname) + 1;
+
+	ret = cr_write_obj(ctx, &h, hh);
+	if (ret < 0)
+		goto out;
+
+	ret = cr_write_string(ctx, name->nodename, hh->nodename_len);
+	if (ret < 0)
+		goto out;
+
+	ret = cr_write_string(ctx, name->domainname, hh->domainname_len);
+ out:
+	cr_hbuf_put(ctx, sizeof(*hh));
+
+	return ret;
+}
+
+static int cr_write_namespaces(struct cr_ctx *ctx, struct task_struct *t)
+{
+	struct cr_hdr h;
+	struct cr_hdr_namespaces *hh = cr_hbuf_get(ctx, sizeof(*hh));
+	struct nsproxy *nsp = t->nsproxy;
+	int ret;
+	int newuts;
+
+	if (!hh)
+		return -ENOMEM;
+
+	h.type = CR_HDR_NS;
+	h.len = sizeof(*hh);
+
+	newuts = cr_obj_add_ptr(ctx, nsp->uts_ns, &hh->uts_ref,
+				CR_OBJ_UTSNS, 0);
+	if (newuts < 0)
+		goto out;
+
+	ret = cr_write_obj(ctx, &h, hh);
+	if (ret)
+		goto out;
+
+	if (newuts) {
+		ret = cr_write_utsns(ctx, &nsp->uts_ns->name);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* FIXME: Write other namespaces here */
+ out:
+	cr_hbuf_put(ctx, sizeof(*hh));
+
+	return ret;
+}
+
 /* dump the task_struct of a given task */
 static int cr_write_task_struct(struct cr_ctx *ctx, struct task_struct *t)
 {
@@ -267,6 +333,10 @@ static int cr_write_task(struct cr_ctx *ctx, struct task_struct *t)
 		goto out;
 	ret = cr_write_cpu(ctx, t);
 	cr_debug("cpu: ret %d\n", ret);
+	if (ret < 0)
+		goto out;
+	ret = cr_write_namespaces(ctx, t);
+	cr_debug("ns: ret %d\n", ret);
  out:
 	return ret;
 }
@@ -302,9 +372,6 @@ static int cr_may_checkpoint_task(struct task_struct *t, struct cr_ctx *ctx)
 	if (t != current && !frozen(t))
 		return -EBUSY;
 
-	if (task_nsproxy(t)->uts_ns != ctx->root_nsproxy->uts_ns)
-		return -EPERM;
-
 	if (task_nsproxy(t)->ipc_ns != ctx->root_nsproxy->ipc_ns)
 		return -EPERM;
 
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 25916c1..c6ae7c1 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -12,6 +12,7 @@
 #include <linux/file.h>
 #include <linux/hash.h>
 #include <linux/checkpoint.h>
+#include <linux/utsname.h>
 
 struct cr_objref {
 	int objref;
@@ -38,6 +39,9 @@ static void cr_obj_ref_drop(struct cr_objref *obj)
 	case CR_OBJ_INODE:
 		iput((struct inode *) obj->ptr);
 		break;
+	case CR_OBJ_UTSNS:
+		put_uts_ns((struct uts_namespace *) obj->ptr);
+		break;
 	default:
 		BUG();
 	}
@@ -55,6 +59,9 @@ static int cr_obj_ref_grab(struct cr_objref *obj)
 		if (!igrab((struct inode *) obj->ptr))
 			ret = -EBADF;
 		break;
+	case CR_OBJ_UTSNS:
+		get_uts_ns((struct uts_namespace *) obj->ptr);
+		break;
 	default:
 		BUG();
 	}
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index adebc1c..1767460 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -15,6 +15,8 @@
 #include <linux/magic.h>
 #include <linux/checkpoint.h>
 #include <linux/checkpoint_hdr.h>
+#include <linux/utsname.h>
+#include <linux/syscalls.h>
 
 #include "checkpoint_arch.h"
 
@@ -236,6 +238,111 @@ static int cr_read_tail(struct cr_ctx *ctx)
 	return ret;
 }
 
+static int cr_read_utsns(struct cr_ctx *ctx, struct task_struct *t)
+{
+	struct cr_hdr_utsns hh;
+	struct uts_namespace *ns;
+	int ret;
+	char *nn = NULL;
+	char *dn = NULL;
+
+	ret = cr_read_obj_type(ctx, &hh, sizeof(hh), CR_HDR_UTSNS);
+	if (ret < 0)
+		return ret;
+
+	nn = kmalloc(hh.nodename_len, GFP_KERNEL);
+	if (!nn) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	dn = kmalloc(hh.domainname_len, GFP_KERNEL);
+	if (!dn) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = cr_read_string(ctx, nn, hh.nodename_len);
+	if (ret < 0)
+		goto out;
+
+	ret = cr_read_string(ctx, dn, hh.domainname_len);
+	if (ret < 0)
+		goto out;
+
+	ret = sys_unshare(CLONE_NEWUTS);
+	if (ret)
+		goto out;
+
+	ns = t->nsproxy->uts_ns;
+
+	if ((hh.nodename_len > sizeof(ns->name.nodename)) ||
+	    (hh.domainname_len > sizeof(ns->name.domainname))) {
+		cr_debug("UTS field length too long\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memset(ns->name.nodename, 0, sizeof(ns->name.nodename));
+	memset(ns->name.domainname, 0, sizeof(ns->name.domainname));
+	memcpy(ns->name.nodename, nn, hh.nodename_len);
+	memcpy(ns->name.domainname, dn, hh.domainname_len);
+
+ out:
+	kfree(nn);
+	kfree(dn);
+
+	return ret;
+}
+
+static int cr_restore_utsns(struct cr_ctx *ctx, int ref)
+{
+	struct uts_namespace *uts;
+	int ret;
+
+	uts = cr_obj_get_by_ref(ctx, ref, CR_OBJ_UTSNS);
+	if (uts == NULL) {
+		ret = cr_read_utsns(ctx, current);
+		if (ret < 0)
+			return ret;
+
+		return cr_obj_add_ref(ctx, current->nsproxy->uts_ns,
+				      ref, CR_OBJ_UTSNS, 0);
+	} else if (IS_ERR(uts)) {
+		cr_debug("Failed to get UTS ns from objhash");
+		return PTR_ERR(uts);
+	}
+
+	ret = copy_namespaces(CLONE_NEWUTS, current);
+	if (ret < 0)
+		return ret;
+
+	put_uts_ns(current->nsproxy->uts_ns);
+	get_uts_ns(uts);
+	current->nsproxy->uts_ns = uts;
+
+	return 0;
+}
+
+static int cr_read_namespaces(struct cr_ctx *ctx)
+{
+	struct cr_hdr_namespaces hh;
+	int ret;
+
+	ret = cr_read_obj_type(ctx, &hh, sizeof(hh), CR_HDR_NS);
+	if (ret < 0)
+		return ret;
+
+	ret = cr_restore_utsns(ctx, hh.uts_ref);
+	cr_debug("uts ns: %d\n", ret);
+	if (ret < 0)
+		return ret;
+
+	/* FIXME: Add more namespaces here */
+
+	return 0;
+}
+
 /* read the task_struct into the current task */
 static int cr_read_task_struct(struct cr_ctx *ctx)
 {
@@ -297,6 +404,10 @@ static int cr_read_task(struct cr_ctx *ctx)
 		goto out;
 	ret = cr_read_cpu(ctx);
 	cr_debug("cpu: ret %d\n", ret);
+	if (ret < 0)
+		goto out;
+	ret = cr_read_namespaces(ctx);
+	cr_debug("ns: ret %d\n", ret);
 
  out:
 	return ret;
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 59ec563..cf20746 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -75,6 +75,7 @@ extern void cr_ctx_put(struct cr_ctx *ctx);
 enum {
 	CR_OBJ_FILE = 1,
 	CR_OBJ_INODE,
+	CR_OBJ_UTSNS,
 	CR_OBJ_MAX
 };
 
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index 22b40a2..97f5761 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -48,6 +48,8 @@ enum {
 	CR_HDR_TASK,
 	CR_HDR_THREAD,
 	CR_HDR_CPU,
+	CR_HDR_NS,
+	CR_HDR_UTSNS,
 
 	CR_HDR_MM = 201,
 	CR_HDR_VMA,
@@ -184,4 +186,13 @@ struct cr_hdr_fd_pipe {
 	__s32 nr_bufs;
 } __attribute__((aligned(8)));
 
+struct cr_hdr_namespaces {
+	__u32 uts_ref;
+};
+
+struct cr_hdr_utsns {
+	__u32 nodename_len;
+	__u32 domainname_len;
+};
+
 #endif /* _CHECKPOINT_CKPT_HDR_H_ */
-- 
1.5.6.3

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list