[Devel] Re: [PATCH 2/2] Add support for the per-task sem_undo list (v2)
Oren Laadan
orenl at cs.columbia.edu
Wed Aug 4 11:49:09 PDT 2010
On 08/04/2010 01:02 PM, Dan Smith wrote:
> The semaphore undo list is a set of adjustments to be made to semaphores
> held by a task on exit. Right now, we do not checkpoint or restore this
> list which could cause undesirable behavior by a restarted process on exit.
>
> Changes in v2:
> - Remove collect operation
> - Add a BUILD_BUG_ON() to ensure sizeof(short) == sizeof(__u16)
> - Use sizeof(__u16) when copying to/from checkpoint header
> - Fix a couple of leaked hdr objects
> - Avoid reading the semadj buffer with rcu_read_lock() held
> - Set the sem_undo pointer on tasks other than the first to restore a list
> - Fix refcounting on restart
> - Pull out the guts of exit_sem() into put_undo_list() and call that
> from our drop() function in case we're the last one.
>
> Signed-off-by: Dan Smith<danms at us.ibm.com>
Looks better. Still some more comments...
> ---
> include/linux/checkpoint.h | 4 +
> include/linux/checkpoint_hdr.h | 18 ++
> ipc/sem.c | 342 +++++++++++++++++++++++++++++++++++++++-
> kernel/checkpoint/process.c | 13 ++
> 4 files changed, 369 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 4e25042..34e81f4 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -271,6 +271,10 @@ extern int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t);
> extern int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t);
> extern int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref);
>
> +/* per-task semaphore undo */
> +int checkpoint_obj_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t);
> +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref);
Please add "extern" here (been burned before)
[...]
> +static int obj_sem_undo_grab(void *ptr)
> +{
> + struct sem_undo_list *ulp = ptr;
> +
> + atomic_inc(&ulp->refcnt);
> + return 0;
> +}
> +
> +static void put_undo_list(struct sem_undo_list *ulp);
nit: to me it makes more sense to move grab/drop/checkpoint/restart
code, as well sem_init(), to the end of the file (and avoid those
statics...).
> +static void obj_sem_undo_drop(void *ptr, int lastref)
> +{
> + struct sem_undo_list *ulp = ptr;
> +
> + put_undo_list(ulp);
> +}
> +
> +static int obj_sem_undo_users(void *ptr)
> +{
> + struct sem_undo_list *ulp = ptr;
> +
> + return atomic_read(&ulp->refcnt);
> +}
Can get rid of ..._users() since we don't collect them.
> +
> +static void *restore_sem_undo(struct ckpt_ctx *ctx);
> +static int checkpoint_sem_undo(struct ckpt_ctx *ctx, void *ptr);
> +
> +static const struct ckpt_obj_ops ckpt_obj_sem_undo_ops = {
> + .obj_name = "IPC_SEM_UNDO",
> + .obj_type = CKPT_OBJ_SEM_UNDO,
> + .ref_drop = obj_sem_undo_drop,
> + .ref_grab = obj_sem_undo_grab,
> + .ref_users = obj_sem_undo_users,
(here too)
> + .checkpoint = checkpoint_sem_undo,
> + .restore = restore_sem_undo,
> +};
> +
> void __init sem_init (void)
> {
> sem_init_ns(&init_ipc_ns);
> ipc_init_proc_interface("sysvipc/sem",
> " key semid perms nsems uid gid cuid cgid otime ctime\n",
> IPC_SEM_IDS, sysvipc_sem_proc_show);
> +
> + /* sem_undo_list uses a short
> + * ckpt_hdr_task_sem_undo uses a __u16
> + */
> + BUILD_BUG_ON(sizeof(short) != sizeof(__u16));
semadj is short, so:
s/__u16/__s16/
> +
> + register_checkpoint_obj(&ckpt_obj_sem_undo_ops);
> }
>
> /*
> @@ -1363,14 +1407,8 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
> * The current implementation does not do so. The POSIX standard
> * and SVID should be consulted to determine what behavior is mandated.
> */
> -void exit_sem(struct task_struct *tsk)
> +static void put_undo_list(struct sem_undo_list *ulp)
> {
> - struct sem_undo_list *ulp;
> -
> - ulp = tsk->sysvsem.undo_list;
> - if (!ulp)
> - return;
> - tsk->sysvsem.undo_list = NULL;
>
> if (!atomic_dec_and_test(&ulp->refcnt))
> return;
> @@ -1393,7 +1431,7 @@ void exit_sem(struct task_struct *tsk)
> if (semid == -1)
> break;
>
> - sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
> + sma = sem_lock_check(ulp->ipc_ns, un->semid);
This hunk belongs to previous patch, no ?
[...]
> +int checkpoint_sem_undo_adj(struct ckpt_ctx *ctx, struct sem_undo *un)
> +{
> + int nsems;
> + int ret;
> + short *semadj = NULL;
> + struct sem_array *sma;
> + struct ckpt_hdr_task_sem_undo *h = NULL;
> +
> + sma = sem_lock(ctx->root_nsproxy->ipc_ns, un->semid);
> + if (IS_ERR(sma)) {
> + ckpt_debug("unable to lock semid %i (wrong ns?)\n", un->semid);
> + return PTR_ERR(sma);
> + }
> +
> + nsems = sma->sem_nsems;
> + sem_getref_and_unlock(sma);
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO);
> + if (!h)
> + goto putref_abort;
> +
> + semadj = kzalloc(nsems * sizeof(short), GFP_KERNEL);
> + if (!semadj)
> + goto putref_abort;
> +
> + sem_lock_and_putref(sma);
> +
> + h->semid = un->semid;
> + h->semadj_count = nsems;
> + memcpy(semadj, un->semadj, h->semadj_count * sizeof(__u16));
s/u__u16/__s16/
> +
> + sem_unlock(sma);
> +
> + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h);
> + if (ret == 0)
> + ret = ckpt_write_buffer(ctx, semadj, nsems * sizeof(short));
.... s/short/__s16/
[...]
> +static int restore_task_sem_undo_adj(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_hdr_task_sem_undo *h;
> + int len;
> + int ret = -ENOMEM;
> + struct sem_undo *un;
> + int nsems;
> + short *semadj = NULL;
> +
> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO);
> + if (IS_ERR(h))
> + return PTR_ERR(h);
> +
> + len = sizeof(__u16) * h->semadj_count;
s/__u16/__s16/
> + semadj = kzalloc(len, GFP_KERNEL);
> + if (!semadj)
> + goto out;
> +
> + ret = _ckpt_read_buffer(ctx, semadj, len);
> + if (ret< 0)
> + goto out;
Use ckpt_read_payload() - it already allocates the payload buffer.
> +
> + un = find_alloc_undo(current->nsproxy->ipc_ns, h->semid);
> + if (IS_ERR(un)) {
> + ret = PTR_ERR(un);
> + ckpt_debug("unable to find semid %i\n", h->semid);
> + goto out;
> + }
> +
> + nsems = sem_undo_nsems(un, current->nsproxy->ipc_ns);
> + len = sizeof(short) * nsems;
s/short/__s16/
If nsems > h->semadj_count, we may be accessing bad memory in
the semadj buffer from above, therefore ...
> + memcpy(un->semadj, semadj, len);
> + rcu_read_unlock();
> +
> + if (nsems != h->semadj_count)
... this test should occur before the memcpy() above.
> + ckpt_err(ctx, -EINVAL,
> + "semid %i has nmsems=%i but %i undo adjustments\n",
> + h->semid, nsems, h->semadj_count);
> + else
> + ckpt_debug("semid %i restored with %i adjustments\n",
> + h->semid, h->semadj_count);
> + out:
> + ckpt_hdr_put(ctx, h);
> + kfree(semadj);
> +
> + return ret;
> +}
> +
> +static void *restore_sem_undo(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_hdr_task_sem_undo_list *h;
> + struct sem_undo_list *ulp = NULL;
> + int i;
> + int ret = 0;
> +
> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO_LIST);
> + if (IS_ERR(h))
> + return ERR_PTR(PTR_ERR(h));
> +
> + ulp = alloc_undo_list(current->nsproxy->ipc_ns);
> + if (!ulp) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i< h->count; i++) {
> + ret = restore_task_sem_undo_adj(ctx);
> + if (ret< 0)
> + goto out;
> + }
> + out:
> + ckpt_hdr_put(ctx, h);
> + if (ret< 0)
> + return ERR_PTR(ret);
> + else
> + return ulp;
> +}
> +
> +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref)
> +{
> + struct sem_undo_list *ulp;
> +
> + if (!sem_undo_objref)
> + return 0; /* Task had no undo list */
> +
> + ulp = ckpt_obj_try_fetch(ctx, sem_undo_objref, CKPT_OBJ_SEM_UNDO);
> + if (IS_ERR(ulp))
> + return PTR_ERR(ulp);
> +
> + /* The first task to restore a shared list should already have this,
> + * but subsequent ones won't, so attach to current in that case
> + */
> + if (!current->sysvsem.undo_list)
> + current->sysvsem.undo_list = ulp;
> +
> + atomic_inc(&ulp->refcnt);
I suspect there is a leak here: atomic_inc is needed only for
tasks other than the first task; The first task already gets the
refcount deep inside find_alloc_undo(), and the hash-table gets
its ref via the obj->grab method.
[...]
Oren.
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list