[Devel] Re: [PATCH] Add support for the per-task sem_undo list
Oren Laadan
orenl at cs.columbia.edu
Fri Jul 30 11:42:15 PDT 2010
Dan Smith wrote:
> The semaphore undo list is a set of adjustments to be made to semaphores
> held by a task on exit. Right now, we do not checkpoint or restore this
> list which could cause undesirable behavior by a restarted process on exit.
>
> Signed-off-by: Dan Smith <danms at us.ibm.com>
See comments inline:
> ---
> include/linux/checkpoint.h | 5 +
> include/linux/checkpoint_hdr.h | 18 +++
> ipc/sem.c | 332 +++++++++++++++++++++++++++++++++++++++-
> kernel/checkpoint/process.c | 16 ++
> 4 files changed, 365 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index 0437de2..285689a 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -275,6 +275,11 @@ extern int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t);
> extern int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t);
> extern int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref);
>
> +/* per-task semaphore undo */
> +int checkpoint_obj_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t);
> +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref);
> +int ckpt_collect_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t);
I supsect we don't need to collect these, since sem-undo-lists
are already covered by ipc-ns.
[...]
> diff --git a/ipc/sem.c b/ipc/sem.c
> index 37da85e..fdad527 100644
> --- a/ipc/sem.c
> +++ b/ipc/sem.c
> @@ -132,12 +132,49 @@ void sem_exit_ns(struct ipc_namespace *ns)
> }
> #endif
>
> +static int obj_sem_undo_grab(void *ptr)
> +{
> + struct sem_undo_list *ulp = ptr;
> +
> + atomic_inc(&ulp->refcnt);
> + return 0;
> +}
> +
> +static void obj_sem_undo_drop(void *ptr, int lastref)
> +{
> + struct sem_undo_list *ulp = ptr;
> +
> + atomic_dec(&ulp->refcnt);
What happens when the count drops to zero ?
> +}
> +
> +static int obj_sem_undo_users(void *ptr)
> +{
> + struct sem_undo_list *ulp = ptr;
> +
> + return atomic_read(&ulp->refcnt);
> +}
> +
> +static void *restore_sem_undo(struct ckpt_ctx *ctx);
> +static int checkpoint_sem_undo(struct ckpt_ctx *ctx, void *ptr);
> +
> +static const struct ckpt_obj_ops ckpt_obj_sem_undo_ops = {
> + .obj_name = "IPC_SEM_UNDO",
> + .obj_type = CKPT_OBJ_SEM_UNDO,
> + .ref_drop = obj_sem_undo_drop,
> + .ref_grab = obj_sem_undo_grab,
> + .ref_users = obj_sem_undo_users,
> + .checkpoint = checkpoint_sem_undo,
> + .restore = restore_sem_undo,
> +};
> +
> void __init sem_init (void)
> {
> sem_init_ns(&init_ipc_ns);
> ipc_init_proc_interface("sysvipc/sem",
> " key semid perms nsems uid gid cuid cgid otime ctime\n",
> IPC_SEM_IDS, sysvipc_sem_proc_show);
> + register_checkpoint_obj(&ckpt_obj_sem_undo_ops);
> +
> }
>
> /*
> @@ -983,6 +1020,20 @@ asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg)
> SYSCALL_ALIAS(sys_semctl, SyS_semctl);
> #endif
>
> +static struct sem_undo_list *alloc_undo_list(void)
> +{
> + struct sem_undo_list *undo_list;
> +
> + undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
> + if (undo_list == NULL)
> + return NULL;
> + spin_lock_init(&undo_list->lock);
> + atomic_set(&undo_list->refcnt, 1);
> + INIT_LIST_HEAD(&undo_list->list_proc);
> +
> + return undo_list;
> +}
> +
> /* If the task doesn't already have a undo_list, then allocate one
> * here. We guarantee there is only one thread using this undo list,
> * and current is THE ONE
> @@ -1000,13 +1051,9 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
>
> undo_list = current->sysvsem.undo_list;
> if (!undo_list) {
> - undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
> - if (undo_list == NULL)
> + undo_list = alloc_undo_list();
> + if (!undo_list)
> return -ENOMEM;
> - spin_lock_init(&undo_list->lock);
> - atomic_set(&undo_list->refcnt, 1);
> - INIT_LIST_HEAD(&undo_list->list_proc);
> -
> current->sysvsem.undo_list = undo_list;
> }
> *undo_listp = undo_list;
> @@ -1458,3 +1505,276 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
> sma->sem_ctime);
> }
> #endif
> +
> +static int __get_task_semids(struct sem_undo_list *ulp, int *semids, int max)
> +{
> + int count = 0;
> + struct sem_undo *un;
> +
> + if (!ulp)
> + return 0;
> +
> + spin_lock(&ulp->lock);
> + list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
> + if (count >= max) {
> + count = -E2BIG;
> + break;
> + }
> + semids[count++] = un->semid;
> + }
> + spin_unlock(&ulp->lock);
> +
> + return count;
> +}
> +
> +static int get_task_semids(struct sem_undo_list *ulp, int **semid_listp)
> +{
> + int ret;
> + int max = 32;
> + int *semid_list = NULL;
> + retry:
> + *semid_listp = krealloc(semid_list, max * sizeof(int), GFP_KERNEL);
> + if (!*semid_listp) {
> + kfree(semid_list);
> + return -ENOMEM;
> + }
> + semid_list = *semid_listp;
> +
> + ret = __get_task_semids(ulp, semid_list, max);
> + if (ret == -E2BIG) {
> + max *= 2;
> + goto retry;
What about @max overflowing and becoming negative ? I guess
that's covered by the test of the krealloc() retval.
> + } else if (ret < 0) {
> + kfree(semid_list);
> + *semid_listp = NULL;
> + }
> +
> + return ret;
> +}
> +
> +int checkpoint_sem_undo_adj(struct ckpt_ctx *ctx, struct sem_undo *un)
> +{
> + int nsems;
> + int ret;
> + short *semadj = NULL;
> + struct sem_array *sma;
> + struct ckpt_hdr_task_sem_undo *h = NULL;
> +
> + sma = sem_lock(ctx->root_nsproxy->ipc_ns, un->semid);
> + if (IS_ERR(sma)) {
> + ckpt_debug("unable to lock semid %i (wrong ns?)\n", un->semid);
> + return PTR_ERR(sma);
> + }
> +
> + nsems = sma->sem_nsems;
> + sem_getref_and_unlock(sma);
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO);
> + if (!h)
> + goto putref_abort;
This will oops because ckpt_hdr_put() is not NULL-friendly...
> +
> + semadj = kzalloc(nsems * sizeof(short), GFP_KERNEL);
> + if (!semadj)
> + goto putref_abort;
> +
> + sem_lock_and_putref(sma);
> +
> + h->semid = un->semid;
> + h->semadj_count = nsems;
> + memcpy(semadj, un->semadj, h->semadj_count * sizeof(short));
sizeof(short) --> sizeof(__s16)
And for safety, maybe a compile assertion:
sizeof(__s16) == sizeof(*un->semadj)
> +
> + sem_unlock(sma);
> +
> + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h);
> + if (ret == 0)
> + ret = ckpt_write_buffer(ctx, semadj, nsems * sizeof(short));
> +
> + kfree(semadj);
ckpt_hdr_put()...
> +
> + return ret;
> +
> + putref_abort:
> + sem_putref(sma);
> + ckpt_hdr_put(ctx, h);
> +
> + return -ENOMEM;
> +}
> +
> +int write_sem_undo_list(struct ckpt_ctx *ctx, struct sem_undo_list *ulp,
> + int count, int *semids)
> +{
> + int i;
> + int ret;
> +
> + for (i = 0; i < count; i++) {
> + struct sem_undo *un;
> +
> + spin_lock(&ulp->lock);
> + un = lookup_undo(ulp, semids[i]);
> + spin_unlock(&ulp->lock);
> +
> + if (!un) {
> + ckpt_debug("unable to lookup semid %i\n", semids[i]);
> + return -EINVAL;
> + }
> +
> + ret = checkpoint_sem_undo_adj(ctx, un);
> + ckpt_debug("checkpoint_sem_undo: %i\n", ret);
> + if (ret < 0)
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static int checkpoint_sem_undo(struct ckpt_ctx *ctx, void *ptr)
> +{
> + int ret;
> + int *semids = NULL;
> + struct sem_undo_list *ulp = ptr;
> + struct ckpt_hdr_task_sem_undo_list *h;
> +
> + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO_LIST);
> + if (!h)
> + return -ENOMEM;
> +
> + ret = get_task_semids(ulp, &semids);
> + if (ret < 0)
> + goto out;
> + h->count = ret;
> +
> + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *)h);
> + if (ret < 0)
> + goto out;
> +
> + ret = write_sem_undo_list(ctx, ulp, h->count, semids);
> + out:
> + ckpt_hdr_put(ctx, h);
> + kfree(semids);
> +
> + return ret;
> +}
> +
> +int checkpoint_obj_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t)
> +{
> + struct sem_undo_list *ulp;
> + int sem_undo_objref = 0;
> +
> + ulp = t->sysvsem.undo_list;
> + if (ulp)
> + sem_undo_objref = checkpoint_obj(ctx, ulp, CKPT_OBJ_SEM_UNDO);
> +
> + return sem_undo_objref;
:)
> +}
> +
> +/* Count the number of sems for the given sem_undo->semid */
> +static int sem_undo_nsems(struct sem_undo *un, struct ipc_namespace *ns)
> +{
> + struct sem_array *sma;
> + int nsems;
> +
> + sma = sem_lock(ns, un->semid);
> + if (IS_ERR(sma))
> + return PTR_ERR(sma);
> +
> + nsems = sma->sem_nsems;
> + sem_unlock(sma);
> +
> + return nsems;
> +}
> +
> +static int restore_task_sem_undo_adj(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_hdr_task_sem_undo *h;
> + int len;
> + int ret = -ENOMEM;
> + struct sem_undo *un;
> + int nsems;
> +
> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO);
> + if (IS_ERR(h))
> + return PTR_ERR(h);
> +
> + un = find_alloc_undo(current->nsproxy->ipc_ns, h->semid);
> + if (IS_ERR(un)) {
> + ret = PTR_ERR(un);
> + ckpt_debug("unable to find semid %i\n", h->semid);
> + goto out_nolock;
> + }
> +
> + nsems = sem_undo_nsems(un, current->nsproxy->ipc_ns);
> + if (nsems != h->semadj_count) {
> + ckpt_err(ctx, -EINVAL,
> + "semid %i has nmsems=%i but %i undo adjustments\n",
> + h->semid, nsems, h->semadj_count);
> + goto out;
> + }
> +
> + len = sizeof(short) * h->semadj_count;
here, too: sizeof(short) --> sizeof(__s16)
And for safety, maybe a compile assertion:
sizeof(__s16) == sizeof(*un->semadj)
Do the values in the buffer require sanity check ?
> + ret = _ckpt_read_buffer(ctx, un->semadj, len);
The rcu_read lock is taken here --> use a temporary buffer
like in checkpoint, or don't keep the lock at all, just a
reference.
> + if (ret < 0)
> + goto out;
> +
> + ckpt_debug("semid %i restored with %i adjustments\n",
> + h->semid, h->semadj_count);
And this debug should also be outside the rcu_read_unlock().
> + out:
> + rcu_read_unlock(); /* was taken by find_alloc_undo() */
> + out_nolock:
> + ckpt_hdr_put(ctx, h);
> +
> + return ret;
> +}
> +
> +static void *restore_sem_undo(struct ckpt_ctx *ctx)
> +{
> + struct ckpt_hdr_task_sem_undo_list *h;
> + struct sem_undo_list *ulp = NULL;
> + int i;
> + int ret = 0;
> +
> + h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_SEM_UNDO_LIST);
> + if (IS_ERR(h))
> + return ERR_PTR(PTR_ERR(h));
> +
> + ulp = alloc_undo_list();
> + if (!ulp) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + for (i = 0; i < h->count; i++) {
> + ret = restore_task_sem_undo_adj(ctx);
> + if (ret < 0)
> + goto out;
> + }
> + out:
> + ckpt_hdr_put(ctx, h);
> + if (ret < 0)
> + return ERR_PTR(ret);
> + else
> + return ulp;
> +}
> +
> +int restore_obj_sem_undo(struct ckpt_ctx *ctx, int sem_undo_objref)
> +{
> + struct sem_undo_list *ulp;
> +
> + if (!sem_undo_objref)
> + return 0; /* Task had no undo list */
> +
> + ulp = ckpt_obj_try_fetch(ctx, sem_undo_objref, CKPT_OBJ_SEM_UNDO);
> + if (IS_ERR(ulp))
> + return PTR_ERR(ulp);
If the fetch succeeds and we are not the original task in which
context the undo-list was restored, then we need to explicitly
attach the list to ourselves.
> +
> + return 0;
> +}
> +
> +int ckpt_collect_sem_undo(struct ckpt_ctx *ctx, struct task_struct *t)
> +{
> + struct sem_undo_list *ulp = t->sysvsem.undo_list;
> +
> + if (ulp)
> + return ckpt_obj_collect(ctx, ulp, CKPT_OBJ_SEM_UNDO);
> + else
> + return 0;
> +}
I'd assume that we don't need to explicitly collect the undo-list
since it requires that they also share ipc-ns, and that is already
collected.
> diff --git a/kernel/checkpoint/process.c b/kernel/checkpoint/process.c
> index 936675a..2d50ac9 100644
> --- a/kernel/checkpoint/process.c
> +++ b/kernel/checkpoint/process.c
> @@ -236,6 +236,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
> int files_objref;
> int mm_objref;
> int fs_objref;
> + int sem_undo_objref;
> int sighand_objref;
> int signal_objref;
> int first, ret;
> @@ -283,6 +284,12 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
> return fs_objref;
> }
>
> + sem_undo_objref = checkpoint_obj_sem_undo(ctx, t);
> + if (sem_undo_objref < 0) {
> + ckpt_err(ctx, sem_undo_objref, "%(T)process sem_undo\n");
> + return sem_undo_objref;
> + }
> +
> sighand_objref = checkpoint_obj_sighand(ctx, t);
> ckpt_debug("sighand: objref %d\n", sighand_objref);
> if (sighand_objref < 0) {
> @@ -311,6 +318,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
> h->files_objref = files_objref;
> h->mm_objref = mm_objref;
> h->fs_objref = fs_objref;
> + h->sem_undo_objref = sem_undo_objref;
> h->sighand_objref = sighand_objref;
> h->signal_objref = signal_objref;
> ret = ckpt_write_obj(ctx, &h->h);
> @@ -492,6 +500,9 @@ int ckpt_collect_task(struct ckpt_ctx *ctx, struct task_struct *t)
> ret = ckpt_collect_fs(ctx, t);
> if (ret < 0)
> return ret;
> + ret = ckpt_collect_sem_undo(ctx, t);
> + if (ret < 0)
> + return ret;
> ret = ckpt_collect_sighand(ctx, t);
>
> return ret;
> @@ -679,6 +690,11 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
> if (ret < 0)
> return ret;
>
> + ret = restore_obj_sem_undo(ctx, h->sem_undo_objref);
> + ckpt_debug("sem_undo: ret %d\n", ret);
> + if (ret < 0)
> + return ret;
> +
> ret = restore_obj_sighand(ctx, h->sighand_objref);
> ckpt_debug("sighand: ret %d (%p)\n", ret, current->sighand);
> if (ret < 0)
Oren.
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list