[Devel] Re: [PATCH] [RFC] checkpoint/restart timerfd
Matt Helsley
matthltc at us.ibm.com
Mon Nov 16 11:55:30 PST 2009
On Mon, Nov 16, 2009 at 01:56:03PM -0500, Oren Laadan wrote:
>
>
> Matt Helsley wrote:
> > Support checkpoint/restart of timers specified via timerfd. Checkpoint
> > essentially does the timerfd_gettime() syscall and saves the expired flags
> > and tick count. This ensures there will be no lost expirations/ticks
> > between checkpoint restart.
> >
> > This should largely work as expected since the time returned from gettime
> > is always relative.
> >
> > However, like any time-sensitive state, timerfds set with TFD_TIMER_ABSTIME
> > may expire/tick at the "wrong time" because that time has long since passed.
> > Short of introducing time namespaces there's almost nothing that can be done
> > to checkpoint these uses of timerfd. Would it make more sense to mark timerfds
> > which were (re)set with TFD_TIMER_ABSTIME and refuse to checkpoint them rather
> > than deliver a "best-effort" expiration/tick?
> >
> > Signed-off-by: Matt Helsley <matthltc at us.ibm.com>
>
> [...]
>
> > +
> > +#ifdef CONFIG_CHECKPOINT
> > +static int timerfd_checkpoint(struct ckpt_ctx *ckpt_ctx, struct file *file)
> > +{
> > + struct itimerspec kotmr;
> > + struct timerfd_ctx *ctx;
> > + struct ckpt_hdr_file_timerfd *h;
> > + int ret = -ENOMEM;
> > +
> > + h = ckpt_hdr_get_type(ckpt_ctx, sizeof(*h), CKPT_HDR_FILE);
> > + if (!h)
> > + return -ENOMEM;
> > + h->common.f_type = CKPT_FILE_EVENTFD;
> > + ret = checkpoint_file_common(ckpt_ctx, file, &h->common);
> > + if (ret < 0)
> > + goto out;
> > +
> > + ctx = file->private_data;
> > + /*
> > + * There is no way to recover the hrtimer mode or flags
> > + * used during create or settime. Rely on the timerfd state
> > + * to reflect those.
> > + */
> > + spin_lock_irq(&ctx->wqh.lock);
> > + h->expired = ctx->expired; /* must precede __timerfd_gettime() */
> > + h->ticks = ctx->ticks; /* must precede __timerfd_gettime() */
> > + h->clockid = ctx->clockid;
> > + __timerfd_gettime(ctx, &kotmr);
> > + spin_unlock_irq(&ctx->wqh.lock);
>
> Can this have a race with the signal c/r code ? E.g. first we
> record a timer which is about to expire, then it expires, and
> then we record the pending signals which include a signal from
> this timer: the restarted task will have the signal pending
> and soon after the timer will expire and possibly generate a
> second signal ?
Except timerfd doesn't use signals -- it uses a wait queue associated
with the fd. If the timer is pending then we checkpoint it and during
restart we will recreate the pending timer. The task will then either
poll() or do work until the timer expires. When the timer expires
timerfd_tmrproc() wakes the restarted task if its poll()'ing. If the task
was not poll()'ing then it can still notice the change by reading "ticks".
etc.
Cheers,
-Matt
>
> > + ckpt_copy_itimerspec(CKPT_CPT, &h->spec, &kotmr);
> > + ret = ckpt_write_obj(ckpt_ctx, &h->common.h);
> > +out:
> > + ckpt_hdr_put(ckpt_ctx, h);
> > + return ret;
> > +}
> > +
> > +struct file *timerfd_restore(struct ckpt_ctx *ckpt_ctx,
> > + struct ckpt_hdr_file *ptr)
> > +{
> > + struct itimerspec dotmr; /* dummy */
> > + struct itimerspec kitmr;
> > + struct ckpt_hdr_file_timerfd *h = (struct ckpt_hdr_file_timerfd *)ptr;
> > + struct file *file;
> > + struct timerfd_ctx *ctx;
> > + int ufd, ret;
> > +
> > + /* Known: CKPT_HDR_FILE and f_type == CKPT_FILE_TIMERFD */
> > + if ((h->common.h.len != sizeof(*h)) ||
> > + (h->common.f_flags & ~TFD_SHARED_FCNTL_FLAGS))
> > + return ERR_PTR(-EINVAL);
> > +
> > + /* Fail early if the timespecs are bad */
> > + ckpt_copy_itimerspec(CKPT_RST, &h->spec, &kitmr);
> > + if (!timespec_valid(&kitmr.it_value) ||
> > + !timespec_valid(&kitmr.it_interval))
> > + return ERR_PTR(-EINVAL);
> > +
> > + ufd = sys_timerfd_create(h->clockid,
> > + h->common.f_flags & TFD_SHARED_FCNTL_FLAGS);
> > + if (ufd < 0)
> > + return ERR_PTR(ufd);
> > + file = fget(ufd);
> > + sys_close(ufd);
> > + if (!file)
> > + return ERR_PTR(-EBUSY);
> > +
> > + ret = restore_file_common(ckpt_ctx, file, &h->common);
> > + if (ret < 0) {
> > + fput(file);
> > + return ERR_PTR(ret);
> > + }
> > +
> > + ctx = file->private_data;
> > +#define TFD_TIMER_RELTIME 0
> > + ret = sys_timerfd_settime(ufd, TFD_TIMER_RELTIME,
> > + &kitmr, &dotmr);
> > + /* Is there a race here between settime and spin_lock()? */
> > + spin_lock_irq(&ctx->wqh.lock);
> > + ctx->expired = h->expired;
> > + ctx->ticks = h->ticks;
> > + spin_unlock_irq(&ctx->wqh.lock);
> > + return file;
> > +}
> > +#else
> > +#define timerfd_checkpoint NULL
> > +#endif
> > +
> > static const struct file_operations timerfd_fops = {
> > .release = timerfd_release,
> > .poll = timerfd_poll,
> > .read = timerfd_read,
> > + .checkpoint = timerfd_checkpoint,
> > };
> >
> > static struct file *timerfd_fget(int fd)
> > @@ -275,19 +377,10 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
> > if (IS_ERR(file))
> > return PTR_ERR(file);
> > ctx = file->private_data;
> > -
> > spin_lock_irq(&ctx->wqh.lock);
> > - if (ctx->expired && ctx->tintv.tv64) {
> > - ctx->expired = 0;
> > - ctx->ticks +=
> > - hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
> > - hrtimer_restart(&ctx->tmr);
> > - }
> > - kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
> > - kotmr.it_interval = ktime_to_timespec(ctx->tintv);
> > + __timerfd_gettime(ctx, &kotmr);
> > spin_unlock_irq(&ctx->wqh.lock);
> > fput(file);
> > -
> > return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
> > }
> >
> > diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
> > index dfcb59b..f429386 100644
> > --- a/include/linux/checkpoint.h
> > +++ b/include/linux/checkpoint.h
> > @@ -321,6 +321,17 @@ static inline int ckpt_validate_errno(int errno)
> > memcpy(LIVE, SAVE, count * sizeof(*SAVE)); \
> > } while (0)
> >
> > +struct ckpt_itimerspec;
> > +struct itimerspec;
> > +static inline void ckpt_copy_itimerspec(int op, struct ckpt_itimerspec *h,
> > + struct itimerspec *tmr)
> > +{
> > + CKPT_COPY(op, h->interval.tv_sec, tmr->it_interval.tv_sec);
> > + CKPT_COPY(op, h->interval.tv_nsec, tmr->it_interval.tv_nsec);
> > + CKPT_COPY(op, h->value.tv_sec, tmr->it_value.tv_sec);
> > + CKPT_COPY(op, h->value.tv_nsec, tmr->it_value.tv_nsec);
> > +}
> > +
>
> Is there a reason to place this here and not in timerfd source file ?
>
> > /* debugging flags */
> > #define CKPT_DBASE 0x1 /* anything */
> > #define CKPT_DSYS 0x2 /* generic (system) */
>
> [...]
>
> Oren.
>
_______________________________________________
Containers mailing list
Containers at lists.linux-foundation.org
https://lists.linux-foundation.org/mailman/listinfo/containers
More information about the Devel
mailing list