[Devel] Re: [PATCH] c/r: fix "scheduling in atomic" while restoring ipc shm
Oren Laadan
orenl at cs.columbia.edu
Wed Feb 24 18:53:31 PST 2010
Hi Nikita,
Thanks for the report and the analysis. It actually helped to
pinpoint a couple of other minor issues in the code. This patch
should fix all of these.
Oren.
Oren Laadan wrote:
> """
> While playing with checkpoint-restart code, version
> several-commits-before-0.19, we have faced "scheduling in atomic" issue.
> ...
> So restore_ipc_shm() calls ipc_lock() and then restore_memory_contents().
> Inside ipc_lock(), a spinlock is taken.
> Inside restore_memory_contents(), checkpoint data is read, that results
> in vfs_read() and a schedule somewhere below.
> ...
> Another related bug: if load_ipc_shm_hdr() fails in line 257, control
> is transfered to mutex: label with negative ret value; ipc_unlock()
> is not called on this path.
> """
>
> Actually, the error could have occurred with other sleeping functions,
> for example, security_restore_obj().
>
> The fix is to simply not take ipc_lock(). this relies on the fact that
> IPC is always restored to a brand new ipc namespace which is only
> accessible to the restarting process(es):
>
> 1) The ipc object (id) could not have been deleted between its creation
> and taking the rw_mutex.
>
> 2) No unauthorized task will attempt to gain access to it, so it is
> safe to do away with ipc_lock(). This is useful because we can call
> functions that sleep.
>
> 3) Likewise, we only restore the security bits further below, so it is
> safe to ignore a security (theoretical only!) race.
>
> If (and when) we allow to restore the ipc state within the parent's
> namespace, we will need to re-examine this.
>
> While at it, there is also a cleanup to not restore perm->{id,key,seq}
> or the shm->shm_segsz, as they are all set upon creation properly.
>
> Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
> Reported-by: "Nikita V. Youshchenko" <yoush at cs.msu.su>
> ---
> ipc/checkpoint.c | 9 ++----
> ipc/checkpoint_msg.c | 35 +++++++++++++++++-------
> ipc/checkpoint_sem.c | 33 ++++++++++++++++------
> ipc/checkpoint_shm.c | 74 +++++++++++++++++++++++++++++++++-----------------
> 4 files changed, 101 insertions(+), 50 deletions(-)
>
> diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
> index 4322dea..06027c2 100644
> --- a/ipc/checkpoint.c
> +++ b/ipc/checkpoint.c
> @@ -203,9 +203,6 @@ int restore_load_ipc_perms(struct ckpt_ctx *ctx,
>
> /* FIX: verify the ->mode field makes sense */
>
> - perm->id = h->id;
> - perm->key = h->key;
> -
> if (!validate_created_perms(h))
> return -EPERM;
> perm->uid = h->uid;
> @@ -213,10 +210,10 @@ int restore_load_ipc_perms(struct ckpt_ctx *ctx,
> perm->cuid = h->cuid;
> perm->cgid = h->cgid;
> perm->mode = h->mode;
> - perm->seq = h->seq;
>
> - return security_restore_obj(ctx, (void *)perm, CKPT_SECURITY_IPC,
> - h->sec_ref);
> + return security_restore_obj(ctx, (void *)perm,
> + CKPT_SECURITY_IPC,
> + h->sec_ref);
> }
>
> static int restore_ipc_any(struct ckpt_ctx *ctx, struct ipc_namespace *ipc_ns,
> diff --git a/ipc/checkpoint_msg.c b/ipc/checkpoint_msg.c
> index 61b3d78..073a893 100644
> --- a/ipc/checkpoint_msg.c
> +++ b/ipc/checkpoint_msg.c
> @@ -306,7 +306,7 @@ static int restore_msg_contents(struct ckpt_ctx *ctx, struct list_head *queue,
> int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> {
> struct ckpt_hdr_ipc_msg *h;
> - struct kern_ipc_perm *perms;
> + struct kern_ipc_perm *ipc;
> struct msg_queue *msq;
> struct ipc_ids *msg_ids = &ns->ids[IPC_MSG_IDS];
> struct list_head messages;
> @@ -344,12 +344,26 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
>
> down_write(&msg_ids->rw_mutex);
>
> - /* we are the sole owners/users of this ipc_ns, it can't go away */
> - perms = ipc_lock(msg_ids, h->perms.id);
> - BUG_ON(IS_ERR(perms)); /* ipc_ns is private to us */
> + /*
> + * We are the sole owners/users of this brand new ipc-ns, so:
> + *
> + * 1) The msgid could not have been deleted between its creation
> + * and taking the rw_mutex above.
> + * 2) No unauthorized task will attempt to gain access to it,
> + * so it is safe to do away with ipc_lock(). This is useful
> + * because we can call functions that sleep.
> + * 3) Likewise, we only restore the security bits further below,
> + * so it is safe to ignore this (theoretical only!) race.
> + *
> + * If/when we allow to restore the ipc state within the parent's
> + * ipc-ns, we will need to re-examine this.
> + */
> +
> + ipc = idr_find(&msg_ids->ipcs_idr, h->perms.id);
> + BUG_ON(!ipc);
>
> - msq = container_of(perms, struct msg_queue, q_perm);
> - BUG_ON(!list_empty(&msq->q_messages)); /* ipc_ns is private to us */
> + msq = container_of(ipc, struct msg_queue, q_perm);
> + BUG_ON(!list_empty(&msq->q_messages));
>
> /* attach queued messages we read before */
> list_splice_init(&messages, &msq->q_messages);
> @@ -362,13 +376,14 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> msq->q_qnum = h->q_qnum;
>
> ret = load_ipc_msg_hdr(ctx, h, msq);
> -
> if (ret < 0) {
> ckpt_debug("msq: need to remove (%d)\n", ret);
> - freeque(ns, perms);
> - } else
> - ipc_unlock(perms);
> + ipc_lock_by_ptr(&msq->q_perm);
> + freeque(ns, ipc);
> + ipc_unlock(ipc);
> + }
> up_write(&msg_ids->rw_mutex);
> +
> out:
> free_msg_list(&messages); /* no-op if all ok, else cleanup msgs */
> ckpt_hdr_put(ctx, h);
> diff --git a/ipc/checkpoint_sem.c b/ipc/checkpoint_sem.c
> index 395c84d..78c1932 100644
> --- a/ipc/checkpoint_sem.c
> +++ b/ipc/checkpoint_sem.c
> @@ -166,7 +166,7 @@ static struct sem *restore_sem_array(struct ckpt_ctx *ctx, int nsems)
> int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> {
> struct ckpt_hdr_ipc_sem *h;
> - struct kern_ipc_perm *perms;
> + struct kern_ipc_perm *ipc;
> struct sem_array *sem;
> struct sem *sma = NULL;
> struct ipc_ids *sem_ids = &ns->ids[IPC_SEM_IDS];
> @@ -201,19 +201,34 @@ int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
>
> down_write(&sem_ids->rw_mutex);
>
> - /* we are the sole owners/users of this ipc_ns, it can't go away */
> - perms = ipc_lock(sem_ids, h->perms.id);
> - BUG_ON(IS_ERR(perms)); /* ipc_ns is private to us */
> -
> - sem = container_of(perms, struct sem_array, sem_perm);
> + /*
> + * We are the sole owners/users of this brand new ipc-ns, so:
> + *
> + * 1) The semid could not have been deleted between its creation
> + * and taking the rw_mutex above.
> + * 2) No unauthorized task will attempt to gain access to it,
> + * so it is safe to do away with ipc_lock(). This is useful
> + * because we can call functions that sleep.
> + * 3) Likewise, we only restore the security bits further below,
> + * so it is safe to ignore this (theoretical only!) race.
> + *
> + * If/when we allow to restore the ipc state within the parent's
> + * ipc-ns, we will need to re-examine this.
> + */
> +
> + ipc = idr_find(&sem_ids->ipcs_idr, h->perms.id);
> + BUG_ON(!ipc);
> +
> + sem = container_of(ipc, struct sem_array, sem_perm);
> memcpy(sem->sem_base, sma, sem->sem_nsems * sizeof(*sma));
>
> ret = load_ipc_sem_hdr(ctx, h, sem);
> if (ret < 0) {
> + ipc_lock_by_ptr(&sem->sem_perm);
> ckpt_debug("sem: need to remove (%d)\n", ret);
> - freeary(ns, perms);
> - } else
> - ipc_unlock(perms);
> + freeary(ns, ipc);
> + ipc_unlock(ipc);
> + }
> up_write(&sem_ids->rw_mutex);
> out:
> kfree(sma);
> diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
> index 01091d9..6329245 100644
> --- a/ipc/checkpoint_shm.c
> +++ b/ipc/checkpoint_shm.c
> @@ -144,18 +144,27 @@ struct dq_ipcshm_del {
> int id;
> };
>
> -static int ipc_shm_delete(void *data)
> +static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
> {
> - struct dq_ipcshm_del *dq = (struct dq_ipcshm_del *) data;
> mm_segment_t old_fs;
> int ret;
>
> old_fs = get_fs();
> set_fs(get_ds());
> - ret = shmctl_down(dq->ipcns, dq->id, IPC_RMID, NULL, 0);
> + ret = shmctl_down(ns, id, IPC_RMID, NULL, 0);
> set_fs(old_fs);
>
> + return ret;
> +}
> +
> +static int ipc_shm_delete(void *data)
> +{
> + struct dq_ipcshm_del *dq = (struct dq_ipcshm_del *) data;
> + int ret;
> +
> + ret = _ipc_shm_delete(dq->ipcns, dq->id);
> put_ipc_ns(dq->ipcns);
> +
> return ret;
> }
>
> @@ -175,7 +184,6 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
> if (h->shm_cprid < 0 || h->shm_lprid < 0)
> return -EINVAL;
>
> - shp->shm_segsz = h->shm_segsz;
> shp->shm_atim = h->shm_atim;
> shp->shm_dtim = h->shm_dtim;
> shp->shm_ctim = h->shm_ctim;
> @@ -188,7 +196,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
> int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> {
> struct ckpt_hdr_ipc_shm *h;
> - struct kern_ipc_perm *perms;
> + struct kern_ipc_perm *ipc;
> struct shmid_kernel *shp;
> struct ipc_ids *shm_ids = &ns->ids[IPC_SHM_IDS];
> struct file *file;
> @@ -213,6 +221,14 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> if (h->flags & SHM_HUGETLB) /* FIXME: support SHM_HUGETLB */
> goto out;
>
> + shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
> + ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
> + h->shm_segsz, shmflag, h->perms.id);
> + ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
> + ckpt_debug("shm: do_shmget ret %d\n", ret);
> + if (ret < 0)
> + goto out;
> +
> /*
> * SHM_DEST means that the shm is to be deleted after creation.
> * However, deleting before it's actually attached is quite silly.
> @@ -228,29 +244,36 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> dq.ipcns = ns;
> get_ipc_ns(dq.ipcns);
>
> - /* XXX can safely use put_ipc_ns() as dtor, see above */
> ret = deferqueue_add(ctx->deferqueue, &dq, sizeof(dq),
> (deferqueue_func_t) ipc_shm_delete,
> - (deferqueue_func_t) put_ipc_ns);
> - if (ret < 0)
> + (deferqueue_func_t) ipc_shm_delete);
> + if (ret < 0) {
> + ipc_shm_delete((void *) &dq);
> goto out;
> + }
> }
>
> - shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
> - ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
> - h->shm_segsz, shmflag, h->perms.id);
> - ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
> - ckpt_debug("shm: do_shmget ret %d\n", ret);
> - if (ret < 0)
> - goto out;
> -
> down_write(&shm_ids->rw_mutex);
>
> - /* we are the sole owners/users of this ipc_ns, it can't go away */
> - perms = ipc_lock(shm_ids, h->perms.id);
> - BUG_ON(IS_ERR(perms)); /* ipc_ns is private to us */
> + /*
> + * We are the sole owners/users of this brand new ipc-ns, so:
> + *
> + * 1) The shmid could not have been deleted between its creation
> + * and taking the rw_mutex above.
> + * 2) No unauthorized task will attempt to gain access to it,
> + * so it is safe to do away with ipc_lock(). This is useful
> + * because we can call functions that sleep.
> + * 3) Likewise, we only restore the security bits further below,
> + * so it is safe to ignore this (theoretical only!) race.
> + *
> + * If/when we allow to restore the ipc state within the parent's
> + * ipc-ns, we will need to re-examine this.
> + */
> +
> + ipc = idr_find(&shm_ids->ipcs_idr, h->perms.id);
> + BUG_ON(!ipc);
>
> - shp = container_of(perms, struct shmid_kernel, shm_perm);
> + shp = container_of(ipc, struct shmid_kernel, shm_perm);
> file = shp->shm_file;
> get_file(file);
>
> @@ -265,12 +288,13 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> mutex:
> fput(file);
> - if (ret < 0) {
> - ckpt_debug("shm: need to remove (%d)\n", ret);
> - do_shm_rmid(ns, perms);
> - } else
> - ipc_unlock(perms);
> up_write(&shm_ids->rw_mutex);
> +
> + /* discard this shmid if error and deferqueue wasn't set */
> + if (ret < 0 && !(h->perms.mode & SHM_DEST)) {
> + ckpt_debug("shm: need to remove (%d)\n", ret);
> + _ipc_shm_delete(ns, h->perms.id);
> + }
> out:
> ckpt_hdr_put(ctx, h);
> return ret;
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list