[Devel] Re: [PATCH] [RFC] checkpoint/restart timerfd
Oren Laadan
orenl at cs.columbia.edu
Mon Nov 16 10:56:03 PST 2009
Matt Helsley wrote:
> Support checkpoint/restart of timers specified via timerfd. Checkpoint
> essentially does the timerfd_gettime() syscall and saves the expired flags
> and tick count. This ensures there will be no lost expirations/ticks
> between checkpoint restart.
>
> This should largely work as expected since the time returned from gettime
> is always relative.
>
> However, like any time-sensitive state, timerfds set with TFD_TIMER_ABSTIME
> may expire/tick at the "wrong time" because that time has long since passed.
> Short of introducing time namespaces there's almost nothing that can be done
> to checkpoint these uses of timerfd. Would it make more sense to mark timerfds
> which were (re)set with TFD_TIMER_ABSTIME and refuse to checkpoint them rather
> than deliver a "best-effort" expiration/tick?
>
> Signed-off-by: Matt Helsley <matthltc at us.ibm.com>
[...]
> +
> +#ifdef CONFIG_CHECKPOINT
> +static int timerfd_checkpoint(struct ckpt_ctx *ckpt_ctx, struct file *file)
> +{
> + struct itimerspec kotmr;
> + struct timerfd_ctx *ctx;
> + struct ckpt_hdr_file_timerfd *h;
> + int ret = -ENOMEM;
> +
> + h = ckpt_hdr_get_type(ckpt_ctx, sizeof(*h), CKPT_HDR_FILE);
> + if (!h)
> + return -ENOMEM;
> + h->common.f_type = CKPT_FILE_EVENTFD;
> + ret = checkpoint_file_common(ckpt_ctx, file, &h->common);
> + if (ret < 0)
> + goto out;
> +
> + ctx = file->private_data;
> + /*
> + * There is no way to recover the hrtimer mode or flags
> + * used during create or settime. Rely on the timerfd state
> + * to reflect those.
> + */
> + spin_lock_irq(&ctx->wqh.lock);
> + h->expired = ctx->expired; /* must precede __timerfd_gettime() */
> + h->ticks = ctx->ticks; /* must precede __timerfd_gettime() */
> + h->clockid = ctx->clockid;
> + __timerfd_gettime(ctx, &kotmr);
> + spin_unlock_irq(&ctx->wqh.lock);
Can this have a race with the signal c/r code ? E.g. first we
record a timer which is about to expire, then it expires, and
then we record the pending signals which include a signal from
this timer: the restarted task will have the signal pending
and soon after the timer will expire and possibly generate a
second signal ?
> + ckpt_copy_itimerspec(CKPT_CPT, &h->spec, &kotmr);
> + ret = ckpt_write_obj(ckpt_ctx, &h->common.h);
> +out:
> + ckpt_hdr_put(ckpt_ctx, h);
> + return ret;
> +}
> +
> +struct file *timerfd_restore(struct ckpt_ctx *ckpt_ctx,
> + struct ckpt_hdr_file *ptr)
> +{
> + struct itimerspec dotmr; /* dummy */
> + struct itimerspec kitmr;
> + struct ckpt_hdr_file_timerfd *h = (struct ckpt_hdr_file_timerfd *)ptr;
> + struct file *file;
> + struct timerfd_ctx *ctx;
> + int ufd, ret;
> +
> + /* Known: CKPT_HDR_FILE and f_type == CKPT_FILE_TIMERFD */
> + if ((h->common.h.len != sizeof(*h)) ||
> + (h->common.f_flags & ~TFD_SHARED_FCNTL_FLAGS))
> + return ERR_PTR(-EINVAL);
> +
> + /* Fail early if the timespecs are bad */
> + ckpt_copy_itimerspec(CKPT_RST, &h->spec, &kitmr);
> + if (!timespec_valid(&kitmr.it_value) ||
> + !timespec_valid(&kitmr.it_interval))
> + return ERR_PTR(-EINVAL);
> +
> + ufd = sys_timerfd_create(h->clockid,
> + h->common.f_flags & TFD_SHARED_FCNTL_FLAGS);
> + if (ufd < 0)
> + return ERR_PTR(ufd);
> + file = fget(ufd);
> + sys_close(ufd);
> + if (!file)
> + return ERR_PTR(-EBUSY);
> +
> + ret = restore_file_common(ckpt_ctx, file, &h->common);
> + if (ret < 0) {
> + fput(file);
> + return ERR_PTR(ret);
> + }
> +
> + ctx = file->private_data;
> +#define TFD_TIMER_RELTIME 0
> + ret = sys_timerfd_settime(ufd, TFD_TIMER_RELTIME,
> + &kitmr, &dotmr);
> + /* Is there a race here between settime and spin_lock()? */
> + spin_lock_irq(&ctx->wqh.lock);
> + ctx->expired = h->expired;
> + ctx->ticks = h->ticks;
> + spin_unlock_irq(&ctx->wqh.lock);
> + return file;
> +}
> +#else
> +#define timerfd_checkpoint NULL
> +#endif
> +
> static const struct file_operations timerfd_fops = {
> .release = timerfd_release,
> .poll = timerfd_poll,
> .read = timerfd_read,
> + .checkpoint = timerfd_checkpoint,
> };
>
> static struct file *timerfd_fget(int fd)
> @@ -275,19 +377,10 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
> if (IS_ERR(file))
> return PTR_ERR(file);
> ctx = file->private_data;
> -
> spin_lock_irq(&ctx->wqh.lock);
> - if (ctx->expired && ctx->tintv.tv64) {
> - ctx->expired = 0;
> - ctx->ticks +=
> - hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
> - hrtimer_restart(&ctx->tmr);
> - }
> - kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
> - kotmr.it_interval = ktime_to_timespec(ctx->tintv);
> + __timerfd_gettime(ctx, &kotmr);
> spin_unlock_irq(&ctx->wqh.lock);
> fput(file);
> -
> return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
> }
>
> diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> index dfcb59b..f429386 100644
> --- a/include/linux/checkpoint.h
> +++ b/include/linux/checkpoint.h
> @@ -321,6 +321,17 @@ static inline int ckpt_validate_errno(int errno)
> memcpy(LIVE, SAVE, count * sizeof(*SAVE)); \
> } while (0)
>
> +struct ckpt_itimerspec;
> +struct itimerspec;
> +static inline void ckpt_copy_itimerspec(int op, struct ckpt_itimerspec *h,
> + struct itimerspec *tmr)
> +{
> + CKPT_COPY(op, h->interval.tv_sec, tmr->it_interval.tv_sec);
> + CKPT_COPY(op, h->interval.tv_nsec, tmr->it_interval.tv_nsec);
> + CKPT_COPY(op, h->value.tv_sec, tmr->it_value.tv_sec);
> + CKPT_COPY(op, h->value.tv_nsec, tmr->it_value.tv_nsec);
> +}
> +
Is there a reason to place this here and not in timerfd source file ?
> /* debugging flags */
> #define CKPT_DBASE 0x1 /* anything */
> #define CKPT_DSYS 0x2 /* generic (system) */
[...]
Oren.
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list