[Devel] Re: [PATCH 3/4] deferqueue: generic queue to defer work

Fri Apr 17 13:33:46 PDT 2009

Thanks, taken.

Serge E. Hallyn wrote:
> Add a interface to postpone an action until the end of the entire
> checkpoint or restart operation. This is useful when during the
> scan of tasks an operation cannot be performed in place, to avoid
> the need for a second scan.
> 
> One use case is when restoring an ipc shared memory region that has
> been deleted (but is still attached), during restart it needs to be
> create, attached and then deleted. However, creation and attachment
> are performed in distinct locations, so deletion can not be performed
> on the spot. Instead, this work (delete) is deferred until later.
> (This example is in one of the following patches).
> 
> The interface is as follows:
> 
> deferqueue_create(void):
>   Allocated a new deferqueue.
> 
> deferqueue_run(deferqueue):
>   Execute all the pending works in the queue. Returns the number of
>   works executed, or an error.
> 
> deferqueue_add(deferqueue, function, data, size):
>   Enqueue a postponed work. @function is the function to do the work,
>   which will be called with @data as an argument. @size tells the
>   size of data.
> 
> deferqueue_destroy(deferqueue):
>   Free the deferqueue and any queued items.
> 
> Why aren't we using the existing kernel workqueue mechanism?  We need
> to defer to work until the end of the operation: not earlier, since we
> need other things to be in place; not later, to not block waiting for
> it. However, the workqueue schedules the work for 'some time later'.
> Also, the kernel workqueue may run in any task context, but we require
> many times that an operation be run in the context of some specific
> restarting task (e.g., restoring IPC state of a certain ipc_ns).
> 
> Instead, this mechanism is a simple way for the c/r operation as a
> whole, and later a task in particular, to defer some action until
> later (but not arbitrarily later) _in the restart_ operation.
> 
> Signed-off-by: Oren Laadan <orenl at cs.columbia.edu>
> Signed-off-by: Serge E. Hallyn <serue at us.ibm.com>
> ---
>  checkpoint/Kconfig         |    5 ++
>  include/linux/deferqueue.h |   31 ++++++++++++++
>  kernel/Makefile            |    1 +
>  kernel/deferqueue.c        |   94 ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 131 insertions(+), 0 deletions(-)
>  create mode 100644 include/linux/deferqueue.h
>  create mode 100644 kernel/deferqueue.c
> 
> diff --git a/checkpoint/Kconfig b/checkpoint/Kconfig
> index 1761b0a..53ed6fa 100644
> --- a/checkpoint/Kconfig
> +++ b/checkpoint/Kconfig
> @@ -2,9 +2,14 @@
>  # implemented the hooks for processor state etc. needed by the
>  # core checkpoint/restart code.
>  
> +config DEFERQUEUE
> +	bool
> +	default n
> +
>  config CHECKPOINT
>  	bool "Enable checkpoint/restart (EXPERIMENTAL)"
>  	depends on CHECKPOINT_SUPPORT && EXPERIMENTAL
> +	select DEFERQUEUE
>  	help
>  	  Application checkpoint/restart is the ability to save the
>  	  state of a running application so that it can later resume
> diff --git a/include/linux/deferqueue.h b/include/linux/deferqueue.h
> new file mode 100644
> index 0000000..fbdc897
> --- /dev/null
> +++ b/include/linux/deferqueue.h
> @@ -0,0 +1,31 @@
> +/*
> + * workqueue.h --- work queue handling for Linux.
> + */
> +
> +#ifndef _LINUX_DEFERQUEUE_H
> +#define _LINUX_DEFERQUEUE_H
> +
> +#include <linux/list.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +
> +typedef int (*deferqueue_func_t)(void *);
> +
> +struct deferqueue_entry {
> +	deferqueue_func_t function;
> +	struct list_head list;
> +	char data[0];
> +};
> +
> +struct deferqueue_head {
> +	spinlock_t lock;
> +	struct list_head list;
> +};
> +
> +struct deferqueue_head *deferqueue_create(void);
> +void deferqueue_destroy(struct deferqueue_head *h);
> +int deferqueue_add(struct deferqueue_head *head, deferqueue_func_t function,
> +		void *data, int size);
> +int deferqueue_run(struct deferqueue_head *head);
> +
> +#endif
> diff --git a/kernel/Makefile b/kernel/Makefile
> index e4791b3..0848374 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -22,6 +22,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg
>  CFLAGS_REMOVE_sched_clock.o = -pg
>  endif
>  
> +obj-$(CONFIG_DEFERQUEUE) += deferqueue.o
>  obj-$(CONFIG_FREEZER) += freezer.o
>  obj-$(CONFIG_PROFILING) += profile.o
>  obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
> diff --git a/kernel/deferqueue.c b/kernel/deferqueue.c
> new file mode 100644
> index 0000000..35abab0
> --- /dev/null
> +++ b/kernel/deferqueue.c
> @@ -0,0 +1,94 @@
> +/*
> + *  Checkpoint-restart - infrastructure to manage deferred work
> + *
> + *  This differs from a workqueue in that the work must be deferred
> + *  until specifically run by the caller.
> + *
> + *  As the only user currently is checkpoint/restart, which has
> + *  very simple usage, the locking is kept simple.  Adding rules
> + *  is protected by the head->lock.  But deferqueue_run() is only
> + *  called once, after all entries have been added.  So it is not
> + *  protected.  Similarly, _destroy is only called once when the
> + *  cr_ctx is releeased, so it is not locked or refcounted.  These
> + *  can of course be added if needed by other users.
> + *
> + *  Copyright (C) 2009 Oren Laadan
> + *
> + *  This file is subject to the terms and conditions of the GNU General Public
> + *  License.  See the file COPYING in the main directory of the Linux
> + *  distribution for more details.
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/kernel.h>
> +#include <linux/deferqueue.h>
> +
> +struct deferqueue_head *deferqueue_create(void)
> +{
> +	struct deferqueue_head *h = kmalloc(sizeof(*h), GFP_KERNEL);
> +	if (h) {
> +		spin_lock_init(&h->lock);
> +		INIT_LIST_HEAD(&h->list);
> +	}
> +	return h;
> +}
> +
> +void deferqueue_destroy(struct deferqueue_head *h)
> +{
> +	if (!list_empty(&h->list)) {
> +		struct deferqueue_entry *wq, *n;
> +
> +		pr_debug("%s: freeing non-empty queue\n", __func__);
> +		list_for_each_entry_safe(wq, n, &h->list, list) {
> +			list_del(&wq->list);
> +			kfree(wq);
> +		}
> +	}
> +	kfree(h);
> +}
> +
> +int deferqueue_add(struct deferqueue_head *head, deferqueue_func_t function,
> +		void *data, int size)
> +{
> +	struct deferqueue_entry *wq;
> +
> +	wq = kmalloc(sizeof(wq) + size, GFP_KERNEL);
> +	if (!wq)
> +		return -ENOMEM;
> +
> +	wq->function = function;
> +	memcpy(wq->data, data, size);
> +
> +	pr_debug("%s: adding work %p function %p\n", __func__, wq,
> +			wq->function);
> +	spin_lock(&head->lock);
> +	list_add_tail(&head->list, &wq->list);
> +	spin_unlock(&head->lock);
> +	return 0;
> +}
> +
> +/*
> + * deferqueue_run - perform all work in the work queue
> + * @head: deferqueue_head from which to run
> + *
> + * returns: number of works performed, or < 0 on error
> + */
> +int deferqueue_run(struct deferqueue_head *head)
> +{
> +	struct deferqueue_entry *wq, *n;
> +	int nr = 0;
> +	int ret;
> +
> +	list_for_each_entry_safe(wq, n, &head->list, list) {
> +		pr_debug("doing work %p function %p\n", wq, wq->function);
> +		ret = wq->function(wq->data);
> +		if (ret < 0)
> +			pr_debug("wq function failed %d\n", ret);
> +		list_del(&wq->list);
> +		kfree(wq);
> +		nr++;
> +	}
> +
> +	return nr;
> +}
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers