[Devel] [PATCH] c/r: fix "scheduling in atomic" while restoring ipc shm
Oren Laadan
orenl at cs.columbia.edu
Wed Feb 24 15:31:07 PST 2010
"""
While playing with checkpoint-restart code, version
several-commits-before-0.19, we have faced "scheduling in atomic" issue.
...
So restore_ipc_shm() calls ipc_lock() and then restore_memory_contents().
Inside ipc_lock(), a spinlock is taken.
Inside restore_memory_contents(), checkpoint data is read, that results
in vfs_read() and a schedule somewhere below.
...
Another related bug: if load_ipc_shm_hdr() fails in line 257, control
is transfered to mutex: label with negative ret value; ipc_unlock()
is not called on this path.
"""
Actually, the error could have occurred with other sleeping functions,
for example, security_restore_obj().
The fix is to simply not take ipc_lock(). this relies on the fact that
IPC is always restored to a brand new ipc namespace which is only
accessible to the restarting process(es):
1) The ipc object (id) could not have been deleted between its creation
and taking the rw_mutex.
2) No unauthorized task will attempt to gain access to it, so it is
safe to do away with ipc_lock(). This is useful because we can call
functions that sleep.
3) Likewise, we only restore the security bits further below, so it is
safe to ignore a security (theoretical only!) race.
If (and when) we allow to restore the ipc state within the parent's
namespace, we will need to re-examine this.
While at it, there is also a cleanup to not restore perm->{id,key,seq}
or the shm->shm_segsz, as they are all set upon creation properly.
Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
Reported-by: "Nikita V. Youshchenko" <yoush at cs.msu.su>
---
ipc/checkpoint.c | 9 ++----
ipc/checkpoint_msg.c | 35 +++++++++++++++++-------
ipc/checkpoint_sem.c | 33 ++++++++++++++++------
ipc/checkpoint_shm.c | 74 +++++++++++++++++++++++++++++++++-----------------
4 files changed, 101 insertions(+), 50 deletions(-)
diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
index 4322dea..06027c2 100644
--- a/ipc/checkpoint.c
+++ b/ipc/checkpoint.c
@@ -203,9 +203,6 @@ int restore_load_ipc_perms(struct ckpt_ctx *ctx,
/* FIX: verify the ->mode field makes sense */
- perm->id = h->id;
- perm->key = h->key;
-
if (!validate_created_perms(h))
return -EPERM;
perm->uid = h->uid;
@@ -213,10 +210,10 @@ int restore_load_ipc_perms(struct ckpt_ctx *ctx,
perm->cuid = h->cuid;
perm->cgid = h->cgid;
perm->mode = h->mode;
- perm->seq = h->seq;
- return security_restore_obj(ctx, (void *)perm, CKPT_SECURITY_IPC,
- h->sec_ref);
+ return security_restore_obj(ctx, (void *)perm,
+ CKPT_SECURITY_IPC,
+ h->sec_ref);
}
static int restore_ipc_any(struct ckpt_ctx *ctx, struct ipc_namespace *ipc_ns,
diff --git a/ipc/checkpoint_msg.c b/ipc/checkpoint_msg.c
index 61b3d78..073a893 100644
--- a/ipc/checkpoint_msg.c
+++ b/ipc/checkpoint_msg.c
@@ -306,7 +306,7 @@ static int restore_msg_contents(struct ckpt_ctx *ctx, struct list_head *queue,
int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
{
struct ckpt_hdr_ipc_msg *h;
- struct kern_ipc_perm *perms;
+ struct kern_ipc_perm *ipc;
struct msg_queue *msq;
struct ipc_ids *msg_ids = &ns->ids[IPC_MSG_IDS];
struct list_head messages;
@@ -344,12 +344,26 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
down_write(&msg_ids->rw_mutex);
- /* we are the sole owners/users of this ipc_ns, it can't go away */
- perms = ipc_lock(msg_ids, h->perms.id);
- BUG_ON(IS_ERR(perms)); /* ipc_ns is private to us */
+ /*
+ * We are the sole owners/users of this brand new ipc-ns, so:
+ *
+ * 1) The msgid could not have been deleted between its creation
+ * and taking the rw_mutex above.
+ * 2) No unauthorized task will attempt to gain access to it,
+ * so it is safe to do away with ipc_lock(). This is useful
+ * because we can call functions that sleep.
+ * 3) Likewise, we only restore the security bits further below,
+ * so it is safe to ignore this (theoretical only!) race.
+ *
+ * If/when we allow to restore the ipc state within the parent's
+ * ipc-ns, we will need to re-examine this.
+ */
+
+ ipc = idr_find(&msg_ids->ipcs_idr, h->perms.id);
+ BUG_ON(!ipc);
- msq = container_of(perms, struct msg_queue, q_perm);
- BUG_ON(!list_empty(&msq->q_messages)); /* ipc_ns is private to us */
+ msq = container_of(ipc, struct msg_queue, q_perm);
+ BUG_ON(!list_empty(&msq->q_messages));
/* attach queued messages we read before */
list_splice_init(&messages, &msq->q_messages);
@@ -362,13 +376,14 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
msq->q_qnum = h->q_qnum;
ret = load_ipc_msg_hdr(ctx, h, msq);
-
if (ret < 0) {
ckpt_debug("msq: need to remove (%d)\n", ret);
- freeque(ns, perms);
- } else
- ipc_unlock(perms);
+ ipc_lock_by_ptr(&msq->q_perm);
+ freeque(ns, ipc);
+ ipc_unlock(ipc);
+ }
up_write(&msg_ids->rw_mutex);
+
out:
free_msg_list(&messages); /* no-op if all ok, else cleanup msgs */
ckpt_hdr_put(ctx, h);
diff --git a/ipc/checkpoint_sem.c b/ipc/checkpoint_sem.c
index 395c84d..78c1932 100644
--- a/ipc/checkpoint_sem.c
+++ b/ipc/checkpoint_sem.c
@@ -166,7 +166,7 @@ static struct sem *restore_sem_array(struct ckpt_ctx *ctx, int nsems)
int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
{
struct ckpt_hdr_ipc_sem *h;
- struct kern_ipc_perm *perms;
+ struct kern_ipc_perm *ipc;
struct sem_array *sem;
struct sem *sma = NULL;
struct ipc_ids *sem_ids = &ns->ids[IPC_SEM_IDS];
@@ -201,19 +201,34 @@ int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
down_write(&sem_ids->rw_mutex);
- /* we are the sole owners/users of this ipc_ns, it can't go away */
- perms = ipc_lock(sem_ids, h->perms.id);
- BUG_ON(IS_ERR(perms)); /* ipc_ns is private to us */
-
- sem = container_of(perms, struct sem_array, sem_perm);
+ /*
+ * We are the sole owners/users of this brand new ipc-ns, so:
+ *
+ * 1) The semid could not have been deleted between its creation
+ * and taking the rw_mutex above.
+ * 2) No unauthorized task will attempt to gain access to it,
+ * so it is safe to do away with ipc_lock(). This is useful
+ * because we can call functions that sleep.
+ * 3) Likewise, we only restore the security bits further below,
+ * so it is safe to ignore this (theoretical only!) race.
+ *
+ * If/when we allow to restore the ipc state within the parent's
+ * ipc-ns, we will need to re-examine this.
+ */
+
+ ipc = idr_find(&sem_ids->ipcs_idr, h->perms.id);
+ BUG_ON(!ipc);
+
+ sem = container_of(ipc, struct sem_array, sem_perm);
memcpy(sem->sem_base, sma, sem->sem_nsems * sizeof(*sma));
ret = load_ipc_sem_hdr(ctx, h, sem);
if (ret < 0) {
+ ipc_lock_by_ptr(&sem->sem_perm);
ckpt_debug("sem: need to remove (%d)\n", ret);
- freeary(ns, perms);
- } else
- ipc_unlock(perms);
+ freeary(ns, ipc);
+ ipc_unlock(ipc);
+ }
up_write(&sem_ids->rw_mutex);
out:
kfree(sma);
diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
index 01091d9..6329245 100644
--- a/ipc/checkpoint_shm.c
+++ b/ipc/checkpoint_shm.c
@@ -144,18 +144,27 @@ struct dq_ipcshm_del {
int id;
};
-static int ipc_shm_delete(void *data)
+static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
{
- struct dq_ipcshm_del *dq = (struct dq_ipcshm_del *) data;
mm_segment_t old_fs;
int ret;
old_fs = get_fs();
set_fs(get_ds());
- ret = shmctl_down(dq->ipcns, dq->id, IPC_RMID, NULL, 0);
+ ret = shmctl_down(ns, id, IPC_RMID, NULL, 0);
set_fs(old_fs);
+ return ret;
+}
+
+static int ipc_shm_delete(void *data)
+{
+ struct dq_ipcshm_del *dq = (struct dq_ipcshm_del *) data;
+ int ret;
+
+ ret = _ipc_shm_delete(dq->ipcns, dq->id);
put_ipc_ns(dq->ipcns);
+
return ret;
}
@@ -175,7 +184,6 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
if (h->shm_cprid < 0 || h->shm_lprid < 0)
return -EINVAL;
- shp->shm_segsz = h->shm_segsz;
shp->shm_atim = h->shm_atim;
shp->shm_dtim = h->shm_dtim;
shp->shm_ctim = h->shm_ctim;
@@ -188,7 +196,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
{
struct ckpt_hdr_ipc_shm *h;
- struct kern_ipc_perm *perms;
+ struct kern_ipc_perm *ipc;
struct shmid_kernel *shp;
struct ipc_ids *shm_ids = &ns->ids[IPC_SHM_IDS];
struct file *file;
@@ -213,6 +221,14 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
if (h->flags & SHM_HUGETLB) /* FIXME: support SHM_HUGETLB */
goto out;
+ shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
+ ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
+ h->shm_segsz, shmflag, h->perms.id);
+ ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
+ ckpt_debug("shm: do_shmget ret %d\n", ret);
+ if (ret < 0)
+ goto out;
+
/*
* SHM_DEST means that the shm is to be deleted after creation.
* However, deleting before it's actually attached is quite silly.
@@ -228,29 +244,36 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
dq.ipcns = ns;
get_ipc_ns(dq.ipcns);
- /* XXX can safely use put_ipc_ns() as dtor, see above */
ret = deferqueue_add(ctx->deferqueue, &dq, sizeof(dq),
(deferqueue_func_t) ipc_shm_delete,
- (deferqueue_func_t) put_ipc_ns);
- if (ret < 0)
+ (deferqueue_func_t) ipc_shm_delete);
+ if (ret < 0) {
+ ipc_shm_delete((void *) &dq);
goto out;
+ }
}
- shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
- ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
- h->shm_segsz, shmflag, h->perms.id);
- ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
- ckpt_debug("shm: do_shmget ret %d\n", ret);
- if (ret < 0)
- goto out;
-
down_write(&shm_ids->rw_mutex);
- /* we are the sole owners/users of this ipc_ns, it can't go away */
- perms = ipc_lock(shm_ids, h->perms.id);
- BUG_ON(IS_ERR(perms)); /* ipc_ns is private to us */
+ /*
+ * We are the sole owners/users of this brand new ipc-ns, so:
+ *
+ * 1) The shmid could not have been deleted between its creation
+ * and taking the rw_mutex above.
+ * 2) No unauthorized task will attempt to gain access to it,
+ * so it is safe to do away with ipc_lock(). This is useful
+ * because we can call functions that sleep.
+ * 3) Likewise, we only restore the security bits further below,
+ * so it is safe to ignore this (theoretical only!) race.
+ *
+ * If/when we allow to restore the ipc state within the parent's
+ * ipc-ns, we will need to re-examine this.
+ */
+
+ ipc = idr_find(&shm_ids->ipcs_idr, h->perms.id);
+ BUG_ON(!ipc);
- shp = container_of(perms, struct shmid_kernel, shm_perm);
+ shp = container_of(ipc, struct shmid_kernel, shm_perm);
file = shp->shm_file;
get_file(file);
@@ -265,12 +288,13 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
mutex:
fput(file);
- if (ret < 0) {
- ckpt_debug("shm: need to remove (%d)\n", ret);
- do_shm_rmid(ns, perms);
- } else
- ipc_unlock(perms);
up_write(&shm_ids->rw_mutex);
+
+ /* discard this shmid if error and deferqueue wasn't set */
+ if (ret < 0 && !(h->perms.mode & SHM_DEST)) {
+ ckpt_debug("shm: need to remove (%d)\n", ret);
+ _ipc_shm_delete(ns, h->perms.id);
+ }
out:
ckpt_hdr_put(ctx, h);
return ret;
--
1.6.3.3
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list