[PATCH] timerfd: Implement c/r procedure
Cyrill Gorcunov
gorcunov at openvz.org
Tue Jun 10 04:43:56 PDT 2014
Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
Makefile.crtools | 1 +
cr-restore.c | 18 +++++
files.c | 3 +
include/proc_parse.h | 2 +
include/restorer.h | 4 ++
include/timerfd.h | 33 +++++++++
pie/restorer.c | 25 +++++++
proc_parse.c | 60 ++++++++++++++++
timerfd.c | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 337 insertions(+)
create mode 100644 include/timerfd.h
create mode 100644 timerfd.c
diff --git a/Makefile.crtools b/Makefile.crtools
index 4e81afe49ac3..6033b2ce04a9 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -57,6 +57,7 @@ obj-y += pagemap-cache.o
obj-y += kerndat.o
obj-y += stats.o
obj-y += cgroup.o
+obj-y += timerfd.o
obj-y += string.o
obj-y += sigframe.o
ifeq ($(VDSO),y)
diff --git a/cr-restore.c b/cr-restore.c
index 573b989b4bc1..8ae51dfb4e69 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -69,6 +69,7 @@
#include "rst-malloc.h"
#include "plugin.h"
#include "cgroup.h"
+#include "timerfd.h"
#include "parasite-syscall.h"
@@ -153,6 +154,7 @@ static struct collect_image_info *cinfos[] = {
&tty_cinfo,
&tunfile_cinfo,
&ext_file_cinfo,
+ &timerfd_cinfo,
};
static int root_prepare_shared(void)
@@ -2308,6 +2310,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
void *tcp_socks_mem;
unsigned long tcp_socks;
+ void *timerfd_mem;
+ unsigned long timerfd_mem_cpos;
+
#ifdef CONFIG_VDSO
unsigned long vdso_rt_vma_size = 0;
unsigned long vdso_rt_size = 0;
@@ -2362,6 +2367,16 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
memcpy(tcp_socks_mem, rst_tcp_socks, rst_tcp_socks_len());
/*
+ * Copy timerfd params for restorer args, we need to proceed
+ * timer setting at the very late.
+ */
+ timerfd_mem_cpos = rst_mem_cpos(RM_PRIVATE);
+ timerfd_mem = rst_mem_alloc(rst_timerfd_len(), RM_PRIVATE);
+ if (!timerfd_mem)
+ goto err_nv;
+ memcpy(timerfd_mem, rst_timerfd, rst_timerfd_len());
+
+ /*
* We're about to search for free VM area and inject the restorer blob
* into it. No irrelevent mmaps/mremaps beyond this point, otherwise
* this unwanted mapping might get overlapped by the restorer.
@@ -2484,6 +2499,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
task_args->timer_n = posix_timers_nr;
task_args->posix_timers = rst_mem_remap_ptr(posix_timers_cpos, RM_PRIVATE);
+ task_args->timerfd_n = rst_timerfd_nr;
+ task_args->timerfd = rst_mem_remap_ptr(timerfd_mem_cpos, RM_PRIVATE);
+
task_args->siginfo_nr = siginfo_nr;
task_args->siginfo = rst_mem_remap_ptr(siginfo_cpos, RM_PRIVATE);
diff --git a/files.c b/files.c
index 18d0f9759873..ff2977f74cd0 100644
--- a/files.c
+++ b/files.c
@@ -32,6 +32,7 @@
#include "signalfd.h"
#include "namespaces.h"
#include "tun.h"
+#include "timerfd.h"
#include "fdset.h"
#include "fs-magic.h"
#include "proc_parse.h"
@@ -325,6 +326,8 @@ static int dump_one_file(struct parasite_ctl *ctl, int fd, int lfd, struct fd_op
ops = &fanotify_dump_ops;
else if (is_signalfd_link(link))
ops = &signalfd_dump_ops;
+ else if (is_timerfd_link(link))
+ ops = &timerfd_dump_ops;
else
return dump_unsupp_fd(&p, lfd, fdinfo, "anon", link);
diff --git a/include/proc_parse.h b/include/proc_parse.h
index b153328576a0..f663bbfe2517 100644
--- a/include/proc_parse.h
+++ b/include/proc_parse.h
@@ -10,6 +10,7 @@
#include "protobuf/eventpoll.pb-c.h"
#include "protobuf/signalfd.pb-c.h"
#include "protobuf/fsnotify.pb-c.h"
+#include "protobuf/timerfd.pb-c.h"
#define PROC_TASK_COMM_LEN 32
#define PROC_TASK_COMM_LEN_FMT "(%31s"
@@ -164,6 +165,7 @@ union fdinfo_entries {
SignalfdEntry sfd;
InotifyWdEntry ify;
FanotifyMarkEntry ffy;
+ TimerfdEntry tfy;
};
struct fdinfo_common {
diff --git a/include/restorer.h b/include/restorer.h
index bdb2adc9b5e3..981a08488614 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -16,6 +16,7 @@
#include "config.h"
#include "posix-timer.h"
+#include "timerfd.h"
#include "shmem.h"
#include "sigframe.h"
#include "vdso.h"
@@ -125,6 +126,9 @@ struct task_restore_args {
int timer_n;
struct restore_posix_timer *posix_timers;
+ int timerfd_n;
+ struct restore_timerfd *timerfd;
+
CredsEntry creds;
u32 cap_inh[CR_CAP_SIZE];
u32 cap_prm[CR_CAP_SIZE];
diff --git a/include/timerfd.h b/include/timerfd.h
new file mode 100644
index 000000000000..7d3a56cb4981
--- /dev/null
+++ b/include/timerfd.h
@@ -0,0 +1,33 @@
+#ifndef __CR_TIMERFD_H__
+#define __CR_TIMERFD_H__
+
+#include <time.h>
+
+#include "files.h"
+
+struct pstree_item;
+
+struct restore_timerfd {
+ int fd;
+ int settime_flags;
+ unsigned long ticks;
+ struct itimerspec val;
+};
+
+extern const struct fdtype_ops timerfd_dump_ops;
+extern struct collect_image_info timerfd_cinfo;
+extern struct restore_timerfd *rst_timerfd;
+extern unsigned int rst_timerfd_nr;
+
+static inline unsigned long rst_timerfd_len(void)
+{
+ return sizeof(*rst_timerfd) * rst_timerfd_nr;
+}
+
+extern int is_timerfd_link(char *link);
+
+#ifndef TFD_IOC_SET_TICKS
+# define TFD_IOC_SET_TICKS 0x40085400
+#endif
+
+#endif /* __CR_TIMERFD_H__ */
diff --git a/pie/restorer.c b/pie/restorer.c
index 27e69293f784..816039cda47b 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -515,6 +515,25 @@ static int vma_remap(unsigned long src, unsigned long dst, unsigned long len)
return 0;
}
+static int timerfd_arm(struct task_restore_args *args)
+{
+ int i, ret;
+
+ for (i = 0; i < args->timerfd_n; i++) {
+ struct restore_timerfd *t = &args->timerfd[i];
+
+ pr_debug("timerfd: arm for fd %d (%d)\n", t->fd, i);
+
+ ret = sys_ioctl(t->fd, TFD_IOC_SET_TICKS, (unsigned long)&t->ticks);
+ ret |= sys_timerfd_settime(t->fd, t->settime_flags, &t->val, NULL);
+ if (ret) {
+ pr_err("Can't restore ticks/time for timerfd - %d\n", i);
+ return ret;
+ }
+ }
+ return 0;
+}
+
static int create_posix_timers(struct task_restore_args *args)
{
int ret, i;
@@ -967,6 +986,12 @@ long __export_restore_task(struct task_restore_args *args)
goto core_restore_end;
}
+ ret = timerfd_arm(args);
+ if (ret < 0) {
+ pr_err("Can't restore timerfd %ld\n", ret);
+ goto core_restore_end;
+ }
+
pr_info("%ld: Restored\n", sys_getpid());
futex_set(&zombies_inprogress, args->nr_zombies);
diff --git a/proc_parse.c b/proc_parse.c
index f2ea897ff526..34d8b12fcf00 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -1043,6 +1043,51 @@ static void parse_fhandle_encoded(char *tok, FhEntry *fh)
}
}
+static int parse_timerfd(FILE *f, char *buf, size_t size, TimerfdEntry *tfy)
+{
+ /*
+ * Format is
+ * clockid: 0
+ * ticks: 0
+ * settime flags: 01
+ * it_value: (0, 49406829)
+ * it_interval: (1, 0)
+ */
+ if (sscanf(buf, "clockid: %d", &tfy->clockid) != 1)
+ goto parse_err;
+
+ if (!fgets(buf, size, f))
+ goto nodata;
+ if (sscanf(buf, "ticks: %llu", (unsigned long long *)&tfy->ticks) != 1)
+ goto parse_err;
+
+ if (!fgets(buf, size, f))
+ goto nodata;
+ if (sscanf(buf, "settime flags: 0%o", &tfy->settime_flags) != 1)
+ goto parse_err;
+
+ if (!fgets(buf, size, f))
+ goto nodata;
+ if (sscanf(buf, "it_value: (%llu, %llu)",
+ (unsigned long long *)&tfy->vsec,
+ (unsigned long long *)&tfy->vnsec) != 2)
+ goto parse_err;
+
+ if (!fgets(buf, size, f))
+ goto nodata;
+ if (sscanf(buf, "it_interval: (%llu, %llu)",
+ (unsigned long long *)&tfy->isec,
+ (unsigned long long *)&tfy->insec) != 2)
+ goto parse_err;
+ return 0;
+
+parse_err:
+ return -1;
+nodata:
+ pr_err("No data left in proc file while parsing timerfd\n");
+ goto parse_err;
+}
+
#define fdinfo_field(str, field) !strncmp(str, field":", sizeof(field))
static int parse_fdinfo_pid_s(char *pid, int fd, int type,
@@ -1105,6 +1150,21 @@ static int parse_fdinfo_pid_s(char *pid, int fd, int type,
entry_met = true;
continue;
}
+ if (fdinfo_field(str, "clockid")) {
+ timerfd_entry__init(&entry.tfy);
+
+ if (type != FD_TYPES__TIMERFD)
+ goto parse_err;
+ ret = parse_timerfd(f, str, sizeof(str), &entry.tfy);
+ if (ret)
+ goto parse_err;
+ ret = cb(&entry, arg);
+ if (ret)
+ goto out;
+
+ entry_met = true;
+ continue;
+ }
if (fdinfo_field(str, "tfd")) {
eventpoll_tfd_entry__init(&entry.epl);
diff --git a/timerfd.c b/timerfd.c
new file mode 100644
index 000000000000..8e9576d2a49a
--- /dev/null
+++ b/timerfd.c
@@ -0,0 +1,191 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/timerfd.h>
+#include <sys/ioctl.h>
+
+#include "protobuf.h"
+#include "protobuf/timerfd.pb-c.h"
+
+#include "proc_parse.h"
+#include "rst-malloc.h"
+#include "restorer.h"
+#include "timerfd.h"
+#include "pstree.h"
+#include "files.h"
+#include "fdset.h"
+#include "util.h"
+#include "log.h"
+#include "bug.h"
+
+#undef LOG_PREFIX
+#define LOG_PREFIX "timerfd: "
+
+struct timerfd_dump_arg {
+ u32 id;
+ const struct fd_parms *p;
+};
+
+struct timerfd_info {
+ TimerfdEntry *tfe;
+ struct file_desc d;
+};
+
+struct restore_timerfd *rst_timerfd;
+unsigned int rst_timerfd_nr;
+
+int is_timerfd_link(char *link)
+{
+ return is_anon_link_type(link, "[timerfd]");
+}
+
+static int dump_timerfd_entry(union fdinfo_entries *e, void *arg)
+{
+ struct timerfd_dump_arg *da = arg;
+ TimerfdEntry *tfy = &e->tfy;
+
+ tfy->id = da->id;
+ tfy->flags = da->p->flags;
+ tfy->fown = (FownEntry *)&da->p->fown;
+
+ pr_info("Dumping id %#x clockid %d it_value(%llu, %llu) it_interval(%llu, %llu)\n",
+ tfy->id, tfy->clockid, (unsigned long long)tfy->vsec, (unsigned long long)tfy->vnsec,
+ (unsigned long long)tfy->isec, (unsigned long long)tfy->insec);
+
+ return pb_write_one(fdset_fd(glob_fdset, CR_FD_TIMERFD), &e->tfy, PB_TIMERFD);
+}
+
+static int dump_one_timerfd(int lfd, u32 id, const struct fd_parms *p)
+{
+ struct timerfd_dump_arg da = { .id = id, .p = p, };
+ return parse_fdinfo(lfd, FD_TYPES__TIMERFD, dump_timerfd_entry, &da);
+}
+
+const struct fdtype_ops timerfd_dump_ops = {
+ .type = FD_TYPES__TIMERFD,
+ .dump = dump_one_timerfd,
+};
+
+static int timerfd_post_open(struct file_desc *d, int fd)
+{
+ struct timerfd_info *info = container_of(d, struct timerfd_info, d);
+ TimerfdEntry *tfe = info->tfe;
+ struct restore_timerfd *t;
+
+ rst_timerfd_nr++;
+ rst_timerfd = xrealloc(rst_timerfd, rst_timerfd_len());
+ if (!rst_timerfd)
+ return -ENOMEM;
+
+ t = &rst_timerfd[rst_timerfd_nr - 1];
+ t->fd = fd;
+ t->ticks = (unsigned long)tfe->ticks;
+ t->settime_flags = tfe->settime_flags;
+ t->val.it_interval.tv_sec = (time_t)tfe->isec;
+ t->val.it_interval.tv_nsec = (long)tfe->insec;
+ t->val.it_value.tv_sec = (time_t)tfe->vsec;
+ t->val.it_value.tv_nsec = (long)tfe->vnsec;
+
+ return 0;
+}
+
+static int timerfd_open(struct file_desc *d)
+{
+ struct itimerspec v = { };
+ struct timerfd_info *info;
+ TimerfdEntry *tfe;
+ int tmp = -1;
+
+ info = container_of(d, struct timerfd_info, d);
+ tfe = info->tfe;
+ pr_info("Restoring id %#x clockid %d settime_flags %x ticks %llu "
+ "it_value(%llu, %llu) it_interval(%llu, %llu)\n",
+ tfe->id, tfe->clockid, tfe->settime_flags, (unsigned long long)tfe->ticks,
+ (unsigned long long)tfe->vsec, (unsigned long long)tfe->vnsec,
+ (unsigned long long)tfe->isec, (unsigned long long)tfe->insec);
+
+ tmp = timerfd_create(tfe->clockid, 0);
+ if (tmp < 0) {
+ pr_perror("Can't create for %#x\n", tfe->id);
+ return -1;
+ }
+
+ v.it_interval.tv_sec = (time_t)tfe->isec;
+ v.it_interval.tv_nsec = (long)tfe->insec;
+
+ v.it_value.tv_sec = (time_t)tfe->vsec;
+ v.it_value.tv_nsec = (long)tfe->vnsec;
+
+ if (tfe->settime_flags & TFD_TIMER_ABSTIME) {
+ struct timespec ts = { };
+
+ /*
+ * We might need to adjust value because the checkpoint
+ * and restore procedure takes some time itself. Note
+ * we don't adjust nanoseconds, since the result may
+ * overflow the limit NSEC_PER_SEC FIXME
+ */
+ if (clock_gettime(tfe->clockid, &ts)) {
+ pr_perror("Can't get current time");
+ goto err_close;
+ }
+
+ v.it_value.tv_sec += (time_t)ts.tv_sec;
+
+ pr_debug("Ajust id %#x it_value(%llu, %llu) -> it_value(%llu, %llu)\n",
+ tfe->id, (unsigned long long)ts.tv_sec,
+ (unsigned long long)ts.tv_nsec,
+ (unsigned long long)v.it_value.tv_sec,
+ (unsigned long long)v.it_value.tv_nsec);
+ }
+
+ if (rst_file_params(tmp, tfe->fown, tfe->flags)) {
+ pr_perror("Can't restore params for %#x", tfe->id);
+ goto err_close;
+ }
+
+ return tmp;
+
+err_close:
+ close_safe(&tmp);
+ return -1;
+}
+
+static struct file_desc_ops timerfd_desc_ops = {
+ .type = FD_TYPES__TIMERFD,
+ .open = timerfd_open,
+ .post_open = timerfd_post_open,
+};
+
+static int verify_timerfd(TimerfdEntry *tfe)
+{
+ if (tfe->clockid != CLOCK_REALTIME &&
+ tfe->clockid != CLOCK_MONOTONIC) {
+ pr_err("Unknown clock type %d for %#x\n", tfe->clockid, tfe->id);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int collect_one_timerfd(void *o, ProtobufCMessage *msg)
+{
+ struct timerfd_info *info = o;
+
+ info->tfe = pb_msg(msg, TimerfdEntry);
+ if (verify_timerfd(info->tfe)) {
+ pr_err("Verification failed for %#x\n", info->tfe->id);
+ return -1;
+ }
+
+ return file_desc_add(&info->d, info->tfe->id, &timerfd_desc_ops);
+}
+
+struct collect_image_info timerfd_cinfo = {
+ .fd_type = CR_FD_TIMERFD,
+ .pb_type = PB_TIMERFD,
+ .priv_size = sizeof(struct timerfd_info),
+ .collect = collect_one_timerfd,
+ .flags = COLLECT_OPTIONAL,
+};
--
1.9.3
--8vCeF2GUdMpe9ZbK--
More information about the CRIU
mailing list