[Devel] Re: [PATCH] Clear the objhash before completing restart, but delay free (v2)

Oren Laadan orenl at cs.columbia.edu
Mon Jan 31 16:31:10 PST 2011


I modifed the patch a bit according to our IRC chat today:


>From 9c74f82411d77cf0194a17ba99af0dd31070e88a Mon Sep 17 00:00:00 2001
From: Oren Laadan <orenl at cs.columbia.edu>
Date: Mon, 31 Jan 2011 19:01:49 -0500
Subject: [PATCH] c/r: clear the objhash before completing restart, but delay free (v3)

This patch causes the restart coordinator to drop references to
objects in the objhash before releasing the restarted tasks.  This
ensures that objects held exclusively there are released before any
task starts running again.

Keeping those references when we allow restarted tasks to return to
userspace is racy if an object's behavior depends on this reference.
One example is restarting half-closed pipes: without this patch they
temporarily (until the coordinator clears the objhash) appear fully
open on both sides. Consider a pipe with read-side closed. A write to
the write-side should produce EPIPE/SIGPIPE, however, because the
objhash contains a reference to both ends of the pipe, the read-side
at restart will not really be closed until after the cleanup, and a
process would unexpectedly not receive an error. Moving the object
hash clear prevents this race and others like it.

To avoid the overhead of actually freeing the object hash's structures
at the same time, we defer the actual memory free to until after the
restarted tasks are allowed to resume execution.

Original patch posted by Dan Smith.

Changes in v3:
 - Use a flag on ctx->kflags to indicate that "->drop" was done on
   the entire hash instead of moving elements to a "free" list
Changes in v2:
 - Avoid adding another list_head to ckpt_obj by re-using the hlist
   hash node for the free list

Cc: Dan Smith <danms at us.ibm.com>
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
---
 include/linux/checkpoint.h  |    3 +++
 kernel/checkpoint/objhash.c |   17 +++++++++++++----
 kernel/checkpoint/restart.c |    8 ++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index adaf6af..6c0ccfd 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -50,11 +50,13 @@ extern long do_sys_restart(pid_t pid, int fd,
 #define CKPT_CTX_RESTART_BIT		1
 #define CKPT_CTX_SUCCESS_BIT		2
 #define CKPT_CTX_ERROR_BIT		3
+#define CKPT_CTX_DROPPED_BIT		4
 
 #define CKPT_CTX_CHECKPOINT	(1 << CKPT_CTX_CHECKPOINT_BIT)
 #define CKPT_CTX_RESTART	(1 << CKPT_CTX_RESTART_BIT)
 #define CKPT_CTX_SUCCESS	(1 << CKPT_CTX_SUCCESS_BIT)
 #define CKPT_CTX_ERROR		(1 << CKPT_CTX_ERROR_BIT)
+#define CKPT_CTX_DROPPED	(1 << CKPT_CTX_DROPPED_BIT)
 
 /* ckpt_ctx: uflags */
 #define CHECKPOINT_USER_FLAGS \
@@ -176,6 +178,7 @@ extern void restore_notify_error(struct ckpt_ctx *ctx);
 
 /* obj_hash */
 extern void ckpt_obj_hash_free(struct ckpt_ctx *ctx);
+extern void ckpt_obj_hash_drop(struct ckpt_ctx *ctx);
 extern int ckpt_obj_hash_alloc(struct ckpt_ctx *ctx);
 
 extern int restore_obj(struct ckpt_ctx *ctx, struct ckpt_hdr_objref *h);
diff --git a/kernel/checkpoint/objhash.c b/kernel/checkpoint/objhash.c
index 52062a8..40c2e9b 100644
--- a/kernel/checkpoint/objhash.c
+++ b/kernel/checkpoint/objhash.c
@@ -66,7 +66,7 @@ EXPORT_SYMBOL(register_checkpoint_obj);
 #define CKPT_OBJ_HASH_NBITS  10
 #define CKPT_OBJ_HASH_TOTAL  (1UL << CKPT_OBJ_HASH_NBITS)
 
-static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
+static void obj_hash_free(struct ckpt_obj_hash *obj_hash, int drop, int clean)
 {
 	struct hlist_head *h = obj_hash->head;
 	struct hlist_node *n, *t;
@@ -75,19 +75,28 @@ static void obj_hash_clear(struct ckpt_obj_hash *obj_hash)
 
 	for (i = 0; i < CKPT_OBJ_HASH_TOTAL; i++) {
 		hlist_for_each_entry_safe(obj, n, t, &h[i], hash) {
-			if (obj->ops->ref_drop)
+			if (drop && obj->ops->ref_drop)
 				obj->ops->ref_drop(obj->ptr, 1);
-			kfree(obj);
+			if (clean)
+				kfree(obj);
 		}
 	}
+
+}
+
+void ckpt_obj_hash_drop(struct ckpt_ctx *ctx)
+{
+	obj_hash_free(ctx->obj_hash, 1, 0);
+	ckpt_set_ctx_kflag(ctx, CKPT_CTX_DROPPED);
 }
 
 void ckpt_obj_hash_free(struct ckpt_ctx *ctx)
 {
 	struct ckpt_obj_hash *obj_hash = ctx->obj_hash;
+	int dropped = ctx->kflags & CKPT_CTX_DROPPED;
 
 	if (obj_hash) {
-		obj_hash_clear(obj_hash);
+		obj_hash_free(obj_hash, !dropped, 1);
 		kfree(obj_hash->head);
 		kfree(ctx->obj_hash);
 		ctx->obj_hash = NULL;
diff --git a/kernel/checkpoint/restart.c b/kernel/checkpoint/restart.c
index 2eae499..01da67f 100644
--- a/kernel/checkpoint/restart.c
+++ b/kernel/checkpoint/restart.c
@@ -1345,6 +1345,14 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 		destroy_descendants(ctx);
 		ret = ckpt_get_error(ctx);
 	} else {
+		/*
+		 * Drop references to all objects in the objhash before
+		 * any task is allowed to resume: this prevents resources
+		 * whose semantics depend on the number of references from
+		 * temporarily misbehaving (e.g. a half-closed pipe may
+		 * temporarily appear to fully-open).
+		 */
+		ckpt_obj_hash_drop(ctx);
 		ckpt_set_success(ctx);
 		wake_up_all(&ctx->waitq);
 	}
-- 
1.7.1

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list