[PATCH] timerfd: Implement c/r procedure

Cyrill Gorcunov gorcunov at openvz.org
Tue Jun 10 04:43:56 PDT 2014


Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile.crtools     |   1 +
 cr-restore.c         |  18 +++++
 files.c              |   3 +
 include/proc_parse.h |   2 +
 include/restorer.h   |   4 ++
 include/timerfd.h    |  33 +++++++++
 pie/restorer.c       |  25 +++++++
 proc_parse.c         |  60 ++++++++++++++++
 timerfd.c            | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 337 insertions(+)
 create mode 100644 include/timerfd.h
 create mode 100644 timerfd.c

diff --git a/Makefile.crtools b/Makefile.crtools
index 4e81afe49ac3..6033b2ce04a9 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -57,6 +57,7 @@ obj-y	+= pagemap-cache.o
 obj-y	+= kerndat.o
 obj-y	+= stats.o
 obj-y	+= cgroup.o
+obj-y	+= timerfd.o
 obj-y	+= string.o
 obj-y	+= sigframe.o
 ifeq ($(VDSO),y)
diff --git a/cr-restore.c b/cr-restore.c
index 573b989b4bc1..8ae51dfb4e69 100644
--- a/cr-restore.c
+++ b/cr-restore.c
@@ -69,6 +69,7 @@
 #include "rst-malloc.h"
 #include "plugin.h"
 #include "cgroup.h"
+#include "timerfd.h"
 
 #include "parasite-syscall.h"
 
@@ -153,6 +154,7 @@ static struct collect_image_info *cinfos[] = {
 	&tty_cinfo,
 	&tunfile_cinfo,
 	&ext_file_cinfo,
+	&timerfd_cinfo,
 };
 
 static int root_prepare_shared(void)
@@ -2308,6 +2310,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	void *tcp_socks_mem;
 	unsigned long tcp_socks;
 
+	void *timerfd_mem;
+	unsigned long timerfd_mem_cpos;
+
 #ifdef CONFIG_VDSO
 	unsigned long vdso_rt_vma_size = 0;
 	unsigned long vdso_rt_size = 0;
@@ -2362,6 +2367,16 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	memcpy(tcp_socks_mem, rst_tcp_socks, rst_tcp_socks_len());
 
 	/*
+	 * Copy timerfd params for restorer args, we need to proceed
+	 * timer setting at the very late.
+	 */
+	timerfd_mem_cpos = rst_mem_cpos(RM_PRIVATE);
+	timerfd_mem = rst_mem_alloc(rst_timerfd_len(), RM_PRIVATE);
+	if (!timerfd_mem)
+		goto err_nv;
+	memcpy(timerfd_mem, rst_timerfd, rst_timerfd_len());
+
+	/*
 	 * We're about to search for free VM area and inject the restorer blob
 	 * into it. No irrelevent mmaps/mremaps beyond this point, otherwise
 	 * this unwanted mapping might get overlapped by the restorer.
@@ -2484,6 +2499,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
 	task_args->timer_n = posix_timers_nr;
 	task_args->posix_timers = rst_mem_remap_ptr(posix_timers_cpos, RM_PRIVATE);
 
+	task_args->timerfd_n = rst_timerfd_nr;
+	task_args->timerfd = rst_mem_remap_ptr(timerfd_mem_cpos, RM_PRIVATE);
+
 	task_args->siginfo_nr = siginfo_nr;
 	task_args->siginfo = rst_mem_remap_ptr(siginfo_cpos, RM_PRIVATE);
 
diff --git a/files.c b/files.c
index 18d0f9759873..ff2977f74cd0 100644
--- a/files.c
+++ b/files.c
@@ -32,6 +32,7 @@
 #include "signalfd.h"
 #include "namespaces.h"
 #include "tun.h"
+#include "timerfd.h"
 #include "fdset.h"
 #include "fs-magic.h"
 #include "proc_parse.h"
@@ -325,6 +326,8 @@ static int dump_one_file(struct parasite_ctl *ctl, int fd, int lfd, struct fd_op
 			ops = &fanotify_dump_ops;
 		else if (is_signalfd_link(link))
 			ops = &signalfd_dump_ops;
+		else if (is_timerfd_link(link))
+			ops = &timerfd_dump_ops;
 		else
 			return dump_unsupp_fd(&p, lfd, fdinfo, "anon", link);
 
diff --git a/include/proc_parse.h b/include/proc_parse.h
index b153328576a0..f663bbfe2517 100644
--- a/include/proc_parse.h
+++ b/include/proc_parse.h
@@ -10,6 +10,7 @@
 #include "protobuf/eventpoll.pb-c.h"
 #include "protobuf/signalfd.pb-c.h"
 #include "protobuf/fsnotify.pb-c.h"
+#include "protobuf/timerfd.pb-c.h"
 
 #define PROC_TASK_COMM_LEN	32
 #define PROC_TASK_COMM_LEN_FMT	"(%31s"
@@ -164,6 +165,7 @@ union fdinfo_entries {
 	SignalfdEntry sfd;
 	InotifyWdEntry ify;
 	FanotifyMarkEntry ffy;
+	TimerfdEntry tfy;
 };
 
 struct fdinfo_common {
diff --git a/include/restorer.h b/include/restorer.h
index bdb2adc9b5e3..981a08488614 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -16,6 +16,7 @@
 #include "config.h"
 
 #include "posix-timer.h"
+#include "timerfd.h"
 #include "shmem.h"
 #include "sigframe.h"
 #include "vdso.h"
@@ -125,6 +126,9 @@ struct task_restore_args {
 	int				timer_n;
 	struct restore_posix_timer	*posix_timers;
 
+	int				timerfd_n;
+	struct restore_timerfd		*timerfd;
+
 	CredsEntry			creds;
 	u32				cap_inh[CR_CAP_SIZE];
 	u32				cap_prm[CR_CAP_SIZE];
diff --git a/include/timerfd.h b/include/timerfd.h
new file mode 100644
index 000000000000..7d3a56cb4981
--- /dev/null
+++ b/include/timerfd.h
@@ -0,0 +1,33 @@
+#ifndef __CR_TIMERFD_H__
+#define __CR_TIMERFD_H__
+
+#include <time.h>
+
+#include "files.h"
+
+struct pstree_item;
+
+struct restore_timerfd {
+	int			fd;
+	int			settime_flags;
+	unsigned long		ticks;
+	struct itimerspec	val;
+};
+
+extern const struct fdtype_ops timerfd_dump_ops;
+extern struct collect_image_info timerfd_cinfo;
+extern struct restore_timerfd *rst_timerfd;
+extern unsigned int rst_timerfd_nr;
+
+static inline unsigned long rst_timerfd_len(void)
+{
+	return sizeof(*rst_timerfd) * rst_timerfd_nr;
+}
+
+extern int is_timerfd_link(char *link);
+
+#ifndef TFD_IOC_SET_TICKS
+# define TFD_IOC_SET_TICKS	0x40085400
+#endif
+
+#endif /* __CR_TIMERFD_H__ */
diff --git a/pie/restorer.c b/pie/restorer.c
index 27e69293f784..816039cda47b 100644
--- a/pie/restorer.c
+++ b/pie/restorer.c
@@ -515,6 +515,25 @@ static int vma_remap(unsigned long src, unsigned long dst, unsigned long len)
 	return 0;
 }
 
+static int timerfd_arm(struct task_restore_args *args)
+{
+	int i, ret;
+
+	for (i = 0; i < args->timerfd_n; i++) {
+		struct restore_timerfd *t = &args->timerfd[i];
+
+		pr_debug("timerfd: arm for fd %d (%d)\n", t->fd, i);
+
+		ret  = sys_ioctl(t->fd, TFD_IOC_SET_TICKS, (unsigned long)&t->ticks);
+		ret |= sys_timerfd_settime(t->fd, t->settime_flags, &t->val, NULL);
+		if (ret) {
+			pr_err("Can't restore ticks/time for timerfd - %d\n", i);
+			return ret;
+		}
+	}
+	return 0;
+}
+
 static int create_posix_timers(struct task_restore_args *args)
 {
 	int ret, i;
@@ -967,6 +986,12 @@ long __export_restore_task(struct task_restore_args *args)
 		goto core_restore_end;
 	}
 
+	ret = timerfd_arm(args);
+	if (ret < 0) {
+		pr_err("Can't restore timerfd %ld\n", ret);
+		goto core_restore_end;
+	}
+
 	pr_info("%ld: Restored\n", sys_getpid());
 
 	futex_set(&zombies_inprogress, args->nr_zombies);
diff --git a/proc_parse.c b/proc_parse.c
index f2ea897ff526..34d8b12fcf00 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -1043,6 +1043,51 @@ static void parse_fhandle_encoded(char *tok, FhEntry *fh)
 	}
 }
 
+static int parse_timerfd(FILE *f, char *buf, size_t size, TimerfdEntry *tfy)
+{
+	/*
+	 * Format is
+	 * clockid: 0
+	 * ticks: 0
+	 * settime flags: 01
+	 * it_value: (0, 49406829)
+	 * it_interval: (1, 0)
+	 */
+	if (sscanf(buf, "clockid: %d", &tfy->clockid) != 1)
+		goto parse_err;
+
+	if (!fgets(buf, size, f))
+		goto nodata;
+	if (sscanf(buf, "ticks: %llu", (unsigned long long *)&tfy->ticks) != 1)
+		goto parse_err;
+
+	if (!fgets(buf, size, f))
+		goto nodata;
+	if (sscanf(buf, "settime flags: 0%o", &tfy->settime_flags) != 1)
+		goto parse_err;
+
+	if (!fgets(buf, size, f))
+		goto nodata;
+	if (sscanf(buf, "it_value: (%llu, %llu)",
+		   (unsigned long long *)&tfy->vsec,
+		   (unsigned long long *)&tfy->vnsec) != 2)
+		goto parse_err;
+
+	if (!fgets(buf, size, f))
+		goto nodata;
+	if (sscanf(buf, "it_interval: (%llu, %llu)",
+		   (unsigned long long *)&tfy->isec,
+		   (unsigned long long *)&tfy->insec) != 2)
+		goto parse_err;
+	return 0;
+
+parse_err:
+	return -1;
+nodata:
+	pr_err("No data left in proc file while parsing timerfd\n");
+	goto parse_err;
+}
+
 #define fdinfo_field(str, field)	!strncmp(str, field":", sizeof(field))
 
 static int parse_fdinfo_pid_s(char *pid, int fd, int type,
@@ -1105,6 +1150,21 @@ static int parse_fdinfo_pid_s(char *pid, int fd, int type,
 			entry_met = true;
 			continue;
 		}
+		if (fdinfo_field(str, "clockid")) {
+			timerfd_entry__init(&entry.tfy);
+
+			if (type != FD_TYPES__TIMERFD)
+				goto parse_err;
+			ret = parse_timerfd(f, str, sizeof(str), &entry.tfy);
+			if (ret)
+				goto parse_err;
+			ret = cb(&entry, arg);
+			if (ret)
+				goto out;
+
+			entry_met = true;
+			continue;
+		}
 		if (fdinfo_field(str, "tfd")) {
 			eventpoll_tfd_entry__init(&entry.epl);
 
diff --git a/timerfd.c b/timerfd.c
new file mode 100644
index 000000000000..8e9576d2a49a
--- /dev/null
+++ b/timerfd.c
@@ -0,0 +1,191 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/timerfd.h>
+#include <sys/ioctl.h>
+
+#include "protobuf.h"
+#include "protobuf/timerfd.pb-c.h"
+
+#include "proc_parse.h"
+#include "rst-malloc.h"
+#include "restorer.h"
+#include "timerfd.h"
+#include "pstree.h"
+#include "files.h"
+#include "fdset.h"
+#include "util.h"
+#include "log.h"
+#include "bug.h"
+
+#undef	LOG_PREFIX
+#define LOG_PREFIX "timerfd: "
+
+struct timerfd_dump_arg {
+	u32			id;
+	const struct fd_parms	*p;
+};
+
+struct timerfd_info {
+	TimerfdEntry		*tfe;
+	struct file_desc	d;
+};
+
+struct restore_timerfd *rst_timerfd;
+unsigned int rst_timerfd_nr;
+
+int is_timerfd_link(char *link)
+{
+	return is_anon_link_type(link, "[timerfd]");
+}
+
+static int dump_timerfd_entry(union fdinfo_entries *e, void *arg)
+{
+	struct timerfd_dump_arg *da = arg;
+	TimerfdEntry *tfy = &e->tfy;
+
+	tfy->id		= da->id;
+	tfy->flags	= da->p->flags;
+	tfy->fown	= (FownEntry *)&da->p->fown;
+
+	pr_info("Dumping id %#x clockid %d it_value(%llu, %llu) it_interval(%llu, %llu)\n",
+		tfy->id, tfy->clockid, (unsigned long long)tfy->vsec, (unsigned long long)tfy->vnsec,
+		(unsigned long long)tfy->isec, (unsigned long long)tfy->insec);
+
+	return pb_write_one(fdset_fd(glob_fdset, CR_FD_TIMERFD), &e->tfy, PB_TIMERFD);
+}
+
+static int dump_one_timerfd(int lfd, u32 id, const struct fd_parms *p)
+{
+	struct timerfd_dump_arg da = { .id = id, .p = p, };
+	return parse_fdinfo(lfd, FD_TYPES__TIMERFD, dump_timerfd_entry, &da);
+}
+
+const struct fdtype_ops timerfd_dump_ops = {
+	.type		= FD_TYPES__TIMERFD,
+	.dump		= dump_one_timerfd,
+};
+
+static int timerfd_post_open(struct file_desc *d, int fd)
+{
+	struct timerfd_info *info = container_of(d, struct timerfd_info, d);
+	TimerfdEntry *tfe = info->tfe;
+	struct restore_timerfd *t;
+
+	rst_timerfd_nr++;
+	rst_timerfd = xrealloc(rst_timerfd, rst_timerfd_len());
+	if (!rst_timerfd)
+		return -ENOMEM;
+
+	t = &rst_timerfd[rst_timerfd_nr - 1];
+	t->fd				= fd;
+	t->ticks			= (unsigned long)tfe->ticks;
+	t->settime_flags		= tfe->settime_flags;
+	t->val.it_interval.tv_sec	= (time_t)tfe->isec;
+	t->val.it_interval.tv_nsec	= (long)tfe->insec;
+	t->val.it_value.tv_sec		= (time_t)tfe->vsec;
+	t->val.it_value.tv_nsec		= (long)tfe->vnsec;
+
+	return 0;
+}
+
+static int timerfd_open(struct file_desc *d)
+{
+	struct itimerspec v = { };
+	struct timerfd_info *info;
+	TimerfdEntry *tfe;
+	int tmp = -1;
+
+	info = container_of(d, struct timerfd_info, d);
+	tfe = info->tfe;
+	pr_info("Restoring id %#x clockid %d settime_flags %x ticks %llu "
+		"it_value(%llu, %llu) it_interval(%llu, %llu)\n",
+		tfe->id, tfe->clockid, tfe->settime_flags, (unsigned long long)tfe->ticks,
+		(unsigned long long)tfe->vsec, (unsigned long long)tfe->vnsec,
+		(unsigned long long)tfe->isec, (unsigned long long)tfe->insec);
+
+	tmp = timerfd_create(tfe->clockid, 0);
+	if (tmp < 0) {
+		pr_perror("Can't create for %#x\n", tfe->id);
+		return -1;
+	}
+
+	v.it_interval.tv_sec	= (time_t)tfe->isec;
+	v.it_interval.tv_nsec	= (long)tfe->insec;
+
+	v.it_value.tv_sec	= (time_t)tfe->vsec;
+	v.it_value.tv_nsec	= (long)tfe->vnsec;
+
+	if (tfe->settime_flags & TFD_TIMER_ABSTIME) {
+		struct timespec ts = { };
+
+		/*
+		 * We might need to adjust value because the checkpoint
+		 * and restore procedure takes some time itself. Note
+		 * we don't adjust nanoseconds, since the result may
+		 * overflow the limit NSEC_PER_SEC FIXME
+		 */
+		if (clock_gettime(tfe->clockid, &ts)) {
+			pr_perror("Can't get current time");
+			goto err_close;
+		}
+
+		v.it_value.tv_sec += (time_t)ts.tv_sec;
+
+		pr_debug("Ajust id %#x it_value(%llu, %llu) -> it_value(%llu, %llu)\n",
+			 tfe->id, (unsigned long long)ts.tv_sec,
+			 (unsigned long long)ts.tv_nsec,
+			 (unsigned long long)v.it_value.tv_sec,
+			 (unsigned long long)v.it_value.tv_nsec);
+	}
+
+	if (rst_file_params(tmp, tfe->fown, tfe->flags)) {
+		pr_perror("Can't restore params for %#x", tfe->id);
+		goto err_close;
+	}
+
+	return tmp;
+
+err_close:
+	close_safe(&tmp);
+	return -1;
+}
+
+static struct file_desc_ops timerfd_desc_ops = {
+	.type		= FD_TYPES__TIMERFD,
+	.open		= timerfd_open,
+	.post_open	= timerfd_post_open,
+};
+
+static int verify_timerfd(TimerfdEntry *tfe)
+{
+	if (tfe->clockid != CLOCK_REALTIME &&
+	    tfe->clockid != CLOCK_MONOTONIC) {
+		pr_err("Unknown clock type %d for %#x\n", tfe->clockid, tfe->id);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int collect_one_timerfd(void *o, ProtobufCMessage *msg)
+{
+	struct timerfd_info *info = o;
+
+	info->tfe = pb_msg(msg, TimerfdEntry);
+	if (verify_timerfd(info->tfe)) {
+		pr_err("Verification failed for %#x\n", info->tfe->id);
+		return -1;
+	}
+
+	return file_desc_add(&info->d, info->tfe->id, &timerfd_desc_ops);
+}
+
+struct collect_image_info timerfd_cinfo = {
+	.fd_type	= CR_FD_TIMERFD,
+	.pb_type	= PB_TIMERFD,
+	.priv_size	= sizeof(struct timerfd_info),
+	.collect	= collect_one_timerfd,
+	.flags		= COLLECT_OPTIONAL,
+};
-- 
1.9.3


--8vCeF2GUdMpe9ZbK--


More information about the CRIU mailing list