[Devel] [PATCH] c/r: fix "scheduling in atomic" while restoring ipc shm

Oren Laadan orenl at cs.columbia.edu
Wed Feb 24 15:31:07 PST 2010


"""
  While playing with checkpoint-restart code, version
  several-commits-before-0.19, we have faced "scheduling in atomic" issue.
  ...
  So restore_ipc_shm() calls ipc_lock() and then restore_memory_contents().
  Inside ipc_lock(), a spinlock is taken.
  Inside restore_memory_contents(), checkpoint data is read, that results
  in vfs_read() and a schedule somewhere below.
  ...
  Another related bug: if load_ipc_shm_hdr() fails in line 257, control
  is transfered to mutex: label with negative ret value; ipc_unlock()
  is not called on this path.
"""

Actually, the error could have occurred with other sleeping functions,
for example, security_restore_obj().

The fix is to simply not take ipc_lock(). this relies on the fact that
IPC is always restored to a brand new ipc namespace which is only
accessible to the restarting process(es):

1) The ipc object (id) could not have been deleted between its creation
  and taking the rw_mutex.

2) No unauthorized task will attempt to gain access to it, so it is
  safe to do away with ipc_lock(). This is useful because we can call
  functions that sleep.

3) Likewise, we only restore the security bits further below, so it is
  safe to ignore a security (theoretical only!) race.

If (and when) we allow to restore the ipc state within the parent's
namespace, we will need to re-examine this.

While at it, there is also a cleanup to not restore perm->{id,key,seq}
or the shm->shm_segsz, as they are all set upon creation properly.

Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
Reported-by: "Nikita V. Youshchenko" <yoush at cs.msu.su>
---
 ipc/checkpoint.c     |    9 ++----
 ipc/checkpoint_msg.c |   35 +++++++++++++++++-------
 ipc/checkpoint_sem.c |   33 ++++++++++++++++------
 ipc/checkpoint_shm.c |   74 +++++++++++++++++++++++++++++++++-----------------
 4 files changed, 101 insertions(+), 50 deletions(-)

diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
index 4322dea..06027c2 100644
--- a/ipc/checkpoint.c
+++ b/ipc/checkpoint.c
@@ -203,9 +203,6 @@ int restore_load_ipc_perms(struct ckpt_ctx *ctx,
 
 	/* FIX: verify the ->mode field makes sense */
 
-	perm->id = h->id;
-	perm->key = h->key;
-
 	if (!validate_created_perms(h))
 		return -EPERM;
 	perm->uid = h->uid;
@@ -213,10 +210,10 @@ int restore_load_ipc_perms(struct ckpt_ctx *ctx,
 	perm->cuid = h->cuid;
 	perm->cgid = h->cgid;
 	perm->mode = h->mode;
-	perm->seq = h->seq;
 
-	return security_restore_obj(ctx, (void *)perm, CKPT_SECURITY_IPC,
-				   h->sec_ref);
+	return security_restore_obj(ctx, (void *)perm,
+				    CKPT_SECURITY_IPC,
+				    h->sec_ref);
 }
 
 static int restore_ipc_any(struct ckpt_ctx *ctx, struct ipc_namespace *ipc_ns,
diff --git a/ipc/checkpoint_msg.c b/ipc/checkpoint_msg.c
index 61b3d78..073a893 100644
--- a/ipc/checkpoint_msg.c
+++ b/ipc/checkpoint_msg.c
@@ -306,7 +306,7 @@ static int restore_msg_contents(struct ckpt_ctx *ctx, struct list_head *queue,
 int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 {
 	struct ckpt_hdr_ipc_msg *h;
-	struct kern_ipc_perm *perms;
+	struct kern_ipc_perm *ipc;
 	struct msg_queue *msq;
 	struct ipc_ids *msg_ids = &ns->ids[IPC_MSG_IDS];
 	struct list_head messages;
@@ -344,12 +344,26 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 
 	down_write(&msg_ids->rw_mutex);
 
-	/* we are the sole owners/users of this ipc_ns, it can't go away */
-	perms = ipc_lock(msg_ids, h->perms.id);
-	BUG_ON(IS_ERR(perms));	/* ipc_ns is private to us */
+	/*
+	 * We are the sole owners/users of this brand new ipc-ns, so:
+	 *
+	 * 1) The msgid could not have been deleted between its creation
+	 *   and taking the rw_mutex above.
+	 * 2) No unauthorized task will attempt to gain access to it,
+	 *   so it is safe to do away with ipc_lock(). This is useful
+	 *   because we can call functions that sleep.
+	 * 3) Likewise, we only restore the security bits further below,
+	 *   so it is safe to ignore this (theoretical only!) race.
+	 *
+	 * If/when we allow to restore the ipc state within the parent's
+	 * ipc-ns, we will need to re-examine this.
+	 */
+
+	ipc = idr_find(&msg_ids->ipcs_idr, h->perms.id);
+	BUG_ON(!ipc);
 
-	msq = container_of(perms, struct msg_queue, q_perm);
-	BUG_ON(!list_empty(&msq->q_messages));	/* ipc_ns is private to us */
+	msq = container_of(ipc, struct msg_queue, q_perm);
+	BUG_ON(!list_empty(&msq->q_messages));
 
 	/* attach queued messages we read before */
 	list_splice_init(&messages, &msq->q_messages);
@@ -362,13 +376,14 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 	msq->q_qnum = h->q_qnum;
 
 	ret = load_ipc_msg_hdr(ctx, h, msq);
-
 	if (ret < 0) {
 		ckpt_debug("msq: need to remove (%d)\n", ret);
-		freeque(ns, perms);
-	} else
-		ipc_unlock(perms);
+		ipc_lock_by_ptr(&msq->q_perm);
+		freeque(ns, ipc);
+		ipc_unlock(ipc);
+	}
 	up_write(&msg_ids->rw_mutex);
+
  out:
 	free_msg_list(&messages);  /* no-op if all ok, else cleanup msgs */
 	ckpt_hdr_put(ctx, h);
diff --git a/ipc/checkpoint_sem.c b/ipc/checkpoint_sem.c
index 395c84d..78c1932 100644
--- a/ipc/checkpoint_sem.c
+++ b/ipc/checkpoint_sem.c
@@ -166,7 +166,7 @@ static struct sem *restore_sem_array(struct ckpt_ctx *ctx, int nsems)
 int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 {
 	struct ckpt_hdr_ipc_sem *h;
-	struct kern_ipc_perm *perms;
+	struct kern_ipc_perm *ipc;
 	struct sem_array *sem;
 	struct sem *sma = NULL;
 	struct ipc_ids *sem_ids = &ns->ids[IPC_SEM_IDS];
@@ -201,19 +201,34 @@ int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 
 	down_write(&sem_ids->rw_mutex);
 
-	/* we are the sole owners/users of this ipc_ns, it can't go away */
-	perms = ipc_lock(sem_ids, h->perms.id);
-	BUG_ON(IS_ERR(perms));  /* ipc_ns is private to us */
-
-	sem = container_of(perms, struct sem_array, sem_perm);
+	/*
+	 * We are the sole owners/users of this brand new ipc-ns, so:
+	 *
+	 * 1) The semid could not have been deleted between its creation
+	 *   and taking the rw_mutex above.
+	 * 2) No unauthorized task will attempt to gain access to it,
+	 *   so it is safe to do away with ipc_lock(). This is useful
+	 *   because we can call functions that sleep.
+	 * 3) Likewise, we only restore the security bits further below,
+	 *   so it is safe to ignore this (theoretical only!) race.
+	 *
+	 * If/when we allow to restore the ipc state within the parent's
+	 * ipc-ns, we will need to re-examine this.
+	 */
+
+	ipc = idr_find(&sem_ids->ipcs_idr, h->perms.id);
+	BUG_ON(!ipc);
+
+	sem = container_of(ipc, struct sem_array, sem_perm);
 	memcpy(sem->sem_base, sma, sem->sem_nsems * sizeof(*sma));
 
 	ret = load_ipc_sem_hdr(ctx, h, sem);
 	if (ret < 0) {
+		ipc_lock_by_ptr(&sem->sem_perm);
 		ckpt_debug("sem: need to remove (%d)\n", ret);
-		freeary(ns, perms);
-	} else
-		ipc_unlock(perms);
+		freeary(ns, ipc);
+		ipc_unlock(ipc);
+	}
 	up_write(&sem_ids->rw_mutex);
  out:
 	kfree(sma);
diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
index 01091d9..6329245 100644
--- a/ipc/checkpoint_shm.c
+++ b/ipc/checkpoint_shm.c
@@ -144,18 +144,27 @@ struct dq_ipcshm_del {
 	int id;
 };
 
-static int ipc_shm_delete(void *data)
+static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
 {
-	struct dq_ipcshm_del *dq = (struct dq_ipcshm_del *) data;
 	mm_segment_t old_fs;
 	int ret;
 
 	old_fs = get_fs();
 	set_fs(get_ds());
-	ret = shmctl_down(dq->ipcns, dq->id, IPC_RMID, NULL, 0);
+	ret = shmctl_down(ns, id, IPC_RMID, NULL, 0);
 	set_fs(old_fs);
 
+	return ret;
+}
+
+static int ipc_shm_delete(void *data)
+{
+	struct dq_ipcshm_del *dq = (struct dq_ipcshm_del *) data;
+	int ret;
+
+	ret = _ipc_shm_delete(dq->ipcns, dq->id);
 	put_ipc_ns(dq->ipcns);
+
 	return ret;
 }
 
@@ -175,7 +184,6 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
 	if (h->shm_cprid < 0 || h->shm_lprid < 0)
 		return -EINVAL;
 
-	shp->shm_segsz = h->shm_segsz;
 	shp->shm_atim = h->shm_atim;
 	shp->shm_dtim = h->shm_dtim;
 	shp->shm_ctim = h->shm_ctim;
@@ -188,7 +196,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
 int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 {
 	struct ckpt_hdr_ipc_shm *h;
-	struct kern_ipc_perm *perms;
+	struct kern_ipc_perm *ipc;
 	struct shmid_kernel *shp;
 	struct ipc_ids *shm_ids = &ns->ids[IPC_SHM_IDS];
 	struct file *file;
@@ -213,6 +221,14 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 	if (h->flags & SHM_HUGETLB)	/* FIXME: support SHM_HUGETLB */
 		goto out;
 
+	shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
+	ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
+		 h->shm_segsz, shmflag, h->perms.id);
+	ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
+	ckpt_debug("shm: do_shmget ret %d\n", ret);
+	if (ret < 0)
+		goto out;
+
 	/*
 	 * SHM_DEST means that the shm is to be deleted after creation.
 	 * However, deleting before it's actually attached is quite silly.
@@ -228,29 +244,36 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 		dq.ipcns = ns;
 		get_ipc_ns(dq.ipcns);
 
-		/* XXX can safely use put_ipc_ns() as dtor, see above */
 		ret = deferqueue_add(ctx->deferqueue, &dq, sizeof(dq),
 				     (deferqueue_func_t) ipc_shm_delete,
-				     (deferqueue_func_t) put_ipc_ns);
-		if (ret < 0)
+				     (deferqueue_func_t) ipc_shm_delete);
+		if (ret < 0) {
+			ipc_shm_delete((void *) &dq);
 			goto out;
+		}
 	}
 
-	shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
-	ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
-		 h->shm_segsz, shmflag, h->perms.id);
-	ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
-	ckpt_debug("shm: do_shmget ret %d\n", ret);
-	if (ret < 0)
-		goto out;
-
 	down_write(&shm_ids->rw_mutex);
 
-	/* we are the sole owners/users of this ipc_ns, it can't go away */
-	perms = ipc_lock(shm_ids, h->perms.id);
-	BUG_ON(IS_ERR(perms));  /* ipc_ns is private to us */
+	/*
+	 * We are the sole owners/users of this brand new ipc-ns, so:
+	 *
+	 * 1) The shmid could not have been deleted between its creation
+	 *   and taking the rw_mutex above.
+	 * 2) No unauthorized task will attempt to gain access to it,
+	 *   so it is safe to do away with ipc_lock(). This is useful
+	 *   because we can call functions that sleep.
+	 * 3) Likewise, we only restore the security bits further below,
+	 *   so it is safe to ignore this (theoretical only!) race.
+	 *
+	 * If/when we allow to restore the ipc state within the parent's
+	 * ipc-ns, we will need to re-examine this.
+	 */
+
+	ipc = idr_find(&shm_ids->ipcs_idr, h->perms.id);
+	BUG_ON(!ipc);
 
-	shp = container_of(perms, struct shmid_kernel, shm_perm);
+	shp = container_of(ipc, struct shmid_kernel, shm_perm);
 	file = shp->shm_file;
 	get_file(file);
 
@@ -265,12 +288,13 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 	ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
  mutex:
 	fput(file);
-	if (ret < 0) {
-		ckpt_debug("shm: need to remove (%d)\n", ret);
-		do_shm_rmid(ns, perms);
-	} else
-		ipc_unlock(perms);
 	up_write(&shm_ids->rw_mutex);
+
+	/* discard this shmid if error and deferqueue wasn't set */
+	if (ret < 0 && !(h->perms.mode & SHM_DEST)) {
+		ckpt_debug("shm: need to remove (%d)\n", ret);
+		_ipc_shm_delete(ns, h->perms.id);
+	}
  out:
 	ckpt_hdr_put(ctx, h);
 	return ret;
-- 
1.6.3.3

_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers




More information about the Devel mailing list