[CRIU] [PATCH 4/4] vdso: Make it arch specific

Cyrill Gorcunov gorcunov at openvz.org
Mon May 26 00:50:15 PDT 2014


Currently we build vDSO handling code for all archs provided
in the source code having some "common" parts inside pie/vdso.c,
pie/vdso-stub.c, vdso-stub.c and vdso.c. This were more or
less well but in new linux kernels (starting from 3.16 presumably)
the vDSO has been significantly reworked so every architecture
must have own vDSO handling engine (just like the kernel does).

So in this patch we move vDSO code to arch specific and because
aarch64 actually doesn't implement proxification yet due to
kernel restrictions -- we drops it out. When there will be
kernel support we bring it back in proper arch/aarch64
implementation.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 Makefile                        |  16 +--
 Makefile.crtools                |   3 +-
 arch/aarch64/include/asm/vdso.h |  47 --------
 arch/aarch64/vdso-pie.c         |  34 ------
 arch/aarch64/vdso.c             |  36 ------
 arch/arm/include/asm/vdso.h     |  18 ---
 arch/arm/vdso-pie.c             |  17 ---
 arch/arm/vdso.c                 |  20 ----
 arch/x86/include/asm/vdso.h     |  90 +++++++++++++-
 arch/x86/vdso-pie.c             | 229 +++++++++++++++++++++++++++++++++++-
 arch/x86/vdso.c                 |  47 ++++++++
 include/vdso.h                  |  97 ++-------------
 pie/Makefile                    |   3 +-
 pie/vdso-stub.c                 |  31 -----
 pie/vdso.c                      | 255 ----------------------------------------
 proc_parse.c                    |   5 +
 vdso-stub.c                     |  23 ----
 vdso.c                          |  64 ----------
 18 files changed, 384 insertions(+), 651 deletions(-)
 delete mode 100644 arch/aarch64/include/asm/vdso.h
 delete mode 100644 arch/aarch64/vdso-pie.c
 delete mode 100644 arch/aarch64/vdso.c
 delete mode 100644 arch/arm/include/asm/vdso.h
 delete mode 100644 arch/arm/vdso-pie.c
 delete mode 100644 arch/arm/vdso.c
 delete mode 100644 pie/vdso-stub.c
 delete mode 100644 pie/vdso.c
 delete mode 100644 vdso-stub.c
 delete mode 100644 vdso.c

diff --git a/Makefile b/Makefile
index ba077e451552..c3fa5475f2c7 100644
--- a/Makefile
+++ b/Makefile
@@ -35,8 +35,6 @@ OBJCOPY		:= $(CROSS_COMPILE)objcopy
 
 CFLAGS		+= $(USERCFLAGS)
 
-VDSO_O		:= vdso.o
-
 #
 # Fetch ARCH from the uname if not yet set
 #
@@ -73,8 +71,6 @@ ifeq ($(shell echo $(ARCH) | sed -e 's/arm.*/arm/'),arm)
 	ifeq ($(ARMV),7)
 		USERCFLAGS += -march=armv7-a
 	endif
-
-	VDSO_O       := vdso-stub.o
 endif
 
 SRCARCH		?= $(ARCH)
@@ -123,7 +119,6 @@ CRIU-INC	:= lib/criu.h include/criu-plugin.h include/criu-log.h protobuf/rpc.pro
 export CC MAKE CFLAGS LIBS SRCARCH DEFINES MAKEFLAGS CRIU-SO
 export SRC_DIR SYSCALL-LIB SH RM ARCH_DIR OBJCOPY LDARCH LD
 export cflags-y
-export VDSO_O
 export VDSO
 
 include Makefile.inc
@@ -175,19 +170,14 @@ lib/%:: $(VERSION_HEADER) config built-in.o
 lib: $(VERSION_HEADER) config built-in.o
 	$(Q) $(MAKE) $(build)=lib all
 
+ifeq ($(VDSO),y)
+PROGRAM-BUILTINS	+= $(ARCH_DIR)/vdso-pie.o
+endif
 PROGRAM-BUILTINS	+= pie/util-fd.o
 PROGRAM-BUILTINS	+= pie/util.o
 PROGRAM-BUILTINS	+= protobuf/built-in.o
 PROGRAM-BUILTINS	+= built-in.o
 
-$(ARCH_DIR)/vdso-pie.o: pie
-	$(Q) $(MAKE) $(build)=pie $(ARCH_DIR)/vdso-pie.o
-PROGRAM-BUILTINS	+= $(ARCH_DIR)/vdso-pie.o
-pie/$(VDSO_O): pie
-	$(Q) $(MAKE) $(build)=pie pie/$(VDSO_O)
-PROGRAM-BUILTINS	+= pie/$(VDSO_O)
-
-
 $(PROGRAM): $(SYSCALL-LIB) $(ARCH-LIB) $(PROGRAM-BUILTINS)
 	$(E) "  LINK    " $@
 	$(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@
diff --git a/Makefile.crtools b/Makefile.crtools
index 619e9a04c27e..71ffb8bc618c 100644
--- a/Makefile.crtools
+++ b/Makefile.crtools
@@ -58,11 +58,12 @@ obj-y	+= kerndat.o
 obj-y	+= stats.o
 obj-y	+= string.o
 obj-y	+= sigframe.o
+ifeq ($(VDSO),y)
 obj-y	+= $(ARCH_DIR)/vdso.o
+endif
 obj-y	+= cr-service.o
 obj-y	+= sd-daemon.o
 obj-y	+= plugin.o
-obj-y	+= $(VDSO_O)
 
 ifneq ($(MAKECMDGOALS),clean)
 incdeps := y
diff --git a/arch/aarch64/include/asm/vdso.h b/arch/aarch64/include/asm/vdso.h
deleted file mode 100644
index 7efbd0a1b242..000000000000
--- a/arch/aarch64/include/asm/vdso.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef __CR_ASM_VDSO_H__
-#define __CR_ASM_VDSO_H__
-
-#include <sys/types.h>
-
-#include "protobuf/vma.pb-c.h"
-
-struct vdso_symtable;
-struct parasite_ctl;
-struct vm_area_list;
-
-
-enum {
-	VDSO_SYMBOL_CLOCK_GETRES,
-	VDSO_SYMBOL_CLOCK_GETTIME,
-	VDSO_SYMBOL_GETTIMEOFDAY,
-	VDSO_SYMBOL_RT_SIGRETURN,
-
-	VDSO_SYMBOL_MAX
-};
-
-#define VDSO_SYMBOL_CLOCK_GETRES_NAME	"__kernel_clock_getres"
-#define VDSO_SYMBOL_CLOCK_GETTIME_NAME	"__kernel_clock_gettime"
-#define VDSO_SYMBOL_GETTIMEOFDAY_NAME	"__kernel_gettimeofday"
-#define VDSO_SYMBOL_RT_SIGRETURN_NAME	"__kernel_rt_sigreturn"
-
-
-#define DECLARE_VDSO(ident_name, symtab_name)				\
-									\
-char ident_name[] = {							\
-	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,			\
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,			\
-};									\
-									\
-char *symtab_name[VDSO_SYMBOL_MAX] = {					\
-	[VDSO_SYMBOL_CLOCK_GETRES] = VDSO_SYMBOL_CLOCK_GETRES_NAME,	\
-	[VDSO_SYMBOL_RT_SIGRETURN] = VDSO_SYMBOL_RT_SIGRETURN_NAME,	\
-	[VDSO_SYMBOL_GETTIMEOFDAY] = VDSO_SYMBOL_GETTIMEOFDAY_NAME,	\
-	[VDSO_SYMBOL_CLOCK_GETTIME] = VDSO_SYMBOL_CLOCK_GETTIME_NAME	\
-};
-
-
-extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
-extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
-			       struct vm_area_list *vma_area_list);
-
-#endif /* __CR_ASM_VDSO_H__ */
diff --git a/arch/aarch64/vdso-pie.c b/arch/aarch64/vdso-pie.c
deleted file mode 100644
index c96fc25aa022..000000000000
--- a/arch/aarch64/vdso-pie.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <elf.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#include "asm/string.h"
-#include "asm/types.h"
-
-#include "compiler.h"
-#include "syscall.h"
-#include "crtools.h"
-#include "vdso.h"
-#include "vma.h"
-#include "log.h"
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-
-int vdso_redirect_calls(void *base_to, void *base_from,
-			struct vdso_symtable *to,
-			struct vdso_symtable *from)
-{
-	pr_err("vDSO proxy isn't implemented yet");
-	return -1;
-}
diff --git a/arch/aarch64/vdso.c b/arch/aarch64/vdso.c
deleted file mode 100644
index 5053acfc389c..000000000000
--- a/arch/aarch64/vdso.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <elf.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#include "parasite-syscall.h"
-#include "parasite.h"
-#include "compiler.h"
-#include "kerndat.h"
-#include "vdso.h"
-#include "util.h"
-#include "log.h"
-#include "mem.h"
-#include "vma.h"
-
-#include "asm/types.h"
-#include "asm/parasite-syscall.h"
-
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-
-int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
-			struct vm_area_list *vma_area_list)
-{
-	return 0;
-}
diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h
deleted file mode 100644
index b78a22c064b3..000000000000
--- a/arch/arm/include/asm/vdso.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef __CR_ASM_VDSO_H__
-#define __CR_ASM_VDSO_H__
-
-#include <sys/types.h>
-
-#include "protobuf/vma.pb-c.h"
-
-struct vdso_symtable;
-struct parasite_ctl;
-struct vm_area_list;
-
-#define VDSO_SYMBOL_MAX 1
-
-extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
-extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
-			       struct vm_area_list *vma_area_list);
-
-#endif /* __CR_ASM_VDSO_H__ */
diff --git a/arch/arm/vdso-pie.c b/arch/arm/vdso-pie.c
deleted file mode 100644
index 5253d6e1a40c..000000000000
--- a/arch/arm/vdso-pie.c
+++ /dev/null
@@ -1,17 +0,0 @@
-#include <sys/types.h>
-
-#include "vdso.h"
-#include "vma.h"
-#include "log.h"
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-int vdso_redirect_calls(void *base_to, void *base_from,
-			struct vdso_symtable *to,
-			struct vdso_symtable *from)
-{
-	return 0;
-}
diff --git a/arch/arm/vdso.c b/arch/arm/vdso.c
deleted file mode 100644
index 4d4e28a1a1de..000000000000
--- a/arch/arm/vdso.c
+++ /dev/null
@@ -1,20 +0,0 @@
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "compiler.h"
-#include "asm/types.h"
-#include "parasite-syscall.h"
-#include "asm/parasite-syscall.h"
-#include "vdso.h"
-#include "log.h"
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
-			struct vm_area_list *vma_area_list)
-{
-	return 0;
-}
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 267f46cd1933..d91215c0ed71 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -3,8 +3,29 @@
 
 #include <sys/types.h>
 
+#include "asm/int.h"
 #include "protobuf/vma.pb-c.h"
 
+struct parasite_ctl;
+struct vm_area_list;
+
+#define VDSO_PROT		(PROT_READ | PROT_EXEC)
+
+#define VDSO_BAD_ADDR		(-1ul)
+#define VDSO_BAD_PFN		(-1ull)
+
+struct vdso_symbol {
+	char			name[32];
+	unsigned long		offset;
+};
+
+#define VDSO_SYMBOL_INIT	{ .offset = VDSO_BAD_ADDR, }
+
+/* Check if symbol present in symtable */
+static inline bool vdso_symbol_empty(struct vdso_symbol *s)
+{
+	return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
+}
 
 /*
  * This is a minimal amount of symbols
@@ -19,6 +40,66 @@ enum {
 	VDSO_SYMBOL_MAX
 };
 
+struct vdso_symtable {
+	unsigned long		vma_start;
+	unsigned long		vma_end;
+	struct vdso_symbol	symbols[VDSO_SYMBOL_MAX];
+};
+
+#define VDSO_SYMTABLE_INIT						\
+	{								\
+		.vma_start	= VDSO_BAD_ADDR,			\
+		.vma_end	= VDSO_BAD_ADDR,			\
+		.symbols		= {				\
+			[0 ... VDSO_SYMBOL_MAX - 1] =			\
+				(struct vdso_symbol)VDSO_SYMBOL_INIT,	\
+			},						\
+	}
+
+/* Size of VMA associated with vdso */
+static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
+{
+	return t->vma_end - t->vma_start;
+}
+
+/*
+ * Special mark which allows to identify runtime vdso where
+ * calls from proxy vdso are redirected. This mark usually
+ * placed at the start of vdso area where Elf header lives.
+ * Since such runtime vdso is solevey used by proxy and
+ * nobody else is supposed to access it, it's more-less
+ * safe to screw the Elf header with @signature and
+ * @proxy_addr.
+ *
+ * The @proxy_addr deserves a few comments. When we redirect
+ * the calls from proxy to runtime vdso, on next checkpoint
+ * it won't be possible to find which VMA is proxy, thus
+ * we save its address in the member.
+ */
+struct vdso_mark {
+	u64			signature;
+	unsigned long		proxy_addr;
+};
+
+/* Magic number (criuvdso) */
+#define VDSO_MARK_SIGNATURE	(0x6f73647675697263ULL)
+
+static inline bool is_vdso_mark(void *addr)
+{
+	struct vdso_mark *m = addr;
+
+	return m->signature == VDSO_MARK_SIGNATURE &&
+		m->proxy_addr != VDSO_BAD_ADDR;
+}
+
+static inline void vdso_put_mark(void *where, unsigned long proxy_addr)
+{
+	struct vdso_mark *m = where;
+
+	m->signature = VDSO_MARK_SIGNATURE;
+	m->proxy_addr = proxy_addr;
+}
+
 #define VDSO_SYMBOL_CLOCK_GETTIME_NAME	"__vdso_clock_gettime"
 #define VDSO_SYMBOL_GETCPU_NAME		"__vdso_getcpu"
 #define VDSO_SYMBOL_GETTIMEOFDAY_NAME	"__vdso_gettimeofday"
@@ -39,10 +120,13 @@ char *symtab_name[VDSO_SYMBOL_MAX] = {						\
 	[VDSO_SYMBOL_TIME]		= VDSO_SYMBOL_TIME_NAME,		\
 };
 
+extern struct vdso_symtable vdso_sym_rt;
+extern u64 vdso_pfn;
 
-struct vdso_symtable;
-struct parasite_ctl;
-struct vm_area_list;
+extern int vdso_init(void);
+extern int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size);
+extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
+extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma_entry, unsigned long vdso_rt_parked_at);
 
 extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
 extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
diff --git a/arch/x86/vdso-pie.c b/arch/x86/vdso-pie.c
index e3664f5fc56d..d04791916727 100644
--- a/arch/x86/vdso-pie.c
+++ b/arch/x86/vdso-pie.c
@@ -14,7 +14,6 @@
 
 #include "compiler.h"
 #include "syscall.h"
-#include "crtools.h"
 #include "vdso.h"
 #include "vma.h"
 #include "log.h"
@@ -56,3 +55,231 @@ int vdso_redirect_calls(void *base_to, void *base_from,
 
 	return 0;
 }
+
+static unsigned int get_symbol_index(char *symbol, char *symbols[], size_t size)
+{
+	unsigned int i;
+
+	for (i = 0; symbol && i < size; i++) {
+		if (!builtin_strcmp(symbol, symbols[i]))
+			return i;
+	}
+
+	return VDSO_SYMBOL_MAX;
+}
+
+/* Check if pointer is out-of-bound */
+static bool __ptr_oob(void *ptr, void *start, size_t size)
+{
+	void *end = (void *)((unsigned long)start + size);
+	return ptr > end || ptr < start;
+}
+
+int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
+{
+	Elf64_Ehdr *ehdr = (void *)mem;
+	Elf64_Shdr *shdr, *shdr_strtab;
+	Elf64_Shdr *shdr_dynsym;
+	Elf64_Shdr *shdr_dynstr;
+	Elf64_Phdr *phdr;
+	Elf64_Shdr *text;
+	Elf64_Sym *sym;
+
+	char *section_names, *dynsymbol_names;
+
+	unsigned long base = VDSO_BAD_ADDR;
+	unsigned int i, j, k;
+
+	DECLARE_VDSO(vdso_ident, vdso_symbols);
+
+	BUILD_BUG_ON(sizeof(vdso_ident) != sizeof(ehdr->e_ident));
+
+	pr_debug("Parsing at %lx %lx\n",
+		 (long)mem, (long)mem + (long)size);
+
+	/*
+	 * Make sure it's a file we support.
+	 */
+	if (builtin_memcmp(ehdr->e_ident, vdso_ident, sizeof(vdso_ident))) {
+		pr_debug("Elf header magic mismatch\n");
+		goto err;
+	}
+
+	/*
+	 * Figure out base virtual address.
+	 */
+	phdr = (void *)&mem[ehdr->e_phoff];
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (__ptr_oob(phdr, mem, size))
+			goto err;
+		if (phdr->p_type == PT_LOAD) {
+			base = phdr->p_vaddr;
+			break;
+		}
+	}
+	if (base != VDSO_BAD_ADDR) {
+		pr_debug("Base address %lx\n", base);
+	} else {
+		pr_debug("No base address found\n");
+		goto err;
+	}
+
+	/*
+	 * Where the section names lays.
+	 */
+	if (ehdr->e_shstrndx == SHN_UNDEF) {
+		pr_err("Section names are not found\n");
+		goto err;
+	}
+
+	shdr = (void *)&mem[ehdr->e_shoff];
+	shdr_strtab = &shdr[ehdr->e_shstrndx];
+	if (__ptr_oob(shdr_strtab, mem, size))
+		goto err;
+
+	section_names = (void *)&mem[shdr_strtab->sh_offset];
+	shdr_dynsym = shdr_dynstr = text = NULL;
+
+	for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
+		if (__ptr_oob(shdr, mem, size))
+			goto err;
+		if (__ptr_oob(&section_names[shdr->sh_name], mem, size))
+			goto err;
+
+		if (shdr->sh_type == SHT_DYNSYM &&
+		    builtin_strcmp(&section_names[shdr->sh_name],
+				   ".dynsym") == 0) {
+			shdr_dynsym = shdr;
+		} else if (shdr->sh_type == SHT_STRTAB &&
+			   builtin_strcmp(&section_names[shdr->sh_name],
+					  ".dynstr") == 0) {
+			shdr_dynstr = shdr;
+		} else if (shdr->sh_type == SHT_PROGBITS &&
+			   builtin_strcmp(&section_names[shdr->sh_name],
+					  ".text") == 0) {
+			text = shdr;
+		}
+	}
+
+	if (!shdr_dynsym || !shdr_dynstr || !text) {
+		pr_debug("No required sections found\n");
+		goto err;
+	}
+
+	dynsymbol_names = (void *)&mem[shdr_dynstr->sh_offset];
+	if (__ptr_oob(dynsymbol_names, mem, size)	||
+	    __ptr_oob(shdr_dynsym, mem, size)		||
+	    __ptr_oob(text, mem, size))
+		goto err;
+
+	/*
+	 * Walk over global symbols and choose ones we need.
+	 */
+	j = shdr_dynsym->sh_size / sizeof(*sym);
+	sym = (void *)&mem[shdr_dynsym->sh_offset];
+
+	for (i = 0; i < j; i++, sym++) {
+		if (__ptr_oob(sym, mem, size))
+			goto err;
+
+		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
+		    ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+			continue;
+
+		if (__ptr_oob(&dynsymbol_names[sym->st_name], mem, size))
+			goto err;
+
+		k = get_symbol_index(&dynsymbol_names[sym->st_name],
+				     vdso_symbols,
+				     ARRAY_SIZE(vdso_symbols));
+		if (k != VDSO_SYMBOL_MAX) {
+			builtin_memcpy(t->symbols[k].name, vdso_symbols[k],
+				       sizeof(t->symbols[k].name));
+			t->symbols[k].offset = (unsigned long)sym->st_value - base;
+		}
+	}
+	return 0;
+err:
+	return -1;
+}
+
+int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
+{
+	unsigned long addr;
+
+	pr_debug("Remap %s %lx -> %lx\n", who, from, to);
+
+	addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
+	if (addr != to) {
+		pr_err("Unable to remap %lx -> %lx %lx\n",
+		       from, to, addr);
+		return -1;
+	}
+
+	return 0;
+}
+
+int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma, unsigned long vdso_rt_parked_at)
+{
+	struct vdso_symtable s = VDSO_SYMTABLE_INIT;
+	size_t size = vma_entry_len(vma);
+	bool remap_rt = true;
+
+	/*
+	 * Find symbols in dumpee vdso.
+	 */
+	if (vdso_fill_symtable((void *)vma->start, size, &s))
+		return -1;
+
+	if (size == vdso_vma_size(sym_rt)) {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
+			if (s.symbols[i].offset != sym_rt->symbols[i].offset) {
+				remap_rt = false;
+				break;
+			}
+		}
+	} else
+		remap_rt = false;
+
+	/*
+	 * Easy case -- the vdso from image has same offsets and size
+	 * as runtime, so we simply remap runtime vdso to dumpee position
+	 * without generating any proxy.
+	 */
+	if (remap_rt) {
+		pr_info("Runtime vdso matches dumpee, remap inplace\n");
+
+		if (sys_munmap((void *)vma->start, size)) {
+			pr_err("Failed to unmap %s\n", who);
+			return -1;
+		}
+
+		return vdso_remap(who, vdso_rt_parked_at, vma->start, size);
+	}
+
+	/*
+	 * Now complex case -- we need to proxify calls. We redirect
+	 * calls from dumpee vdso to runtime vdso, making dumpee
+	 * to operate as proxy vdso.
+	 */
+	pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
+
+	if (vdso_redirect_calls((void *)vdso_rt_parked_at,
+				(void *)vma->start,
+				sym_rt, &s)) {
+		pr_err("Failed to proxify dumpee contents\n");
+		return -1;
+	}
+
+	/*
+	 * Put a special mark into runtime vdso, thus at next checkpoint
+	 * routine we could detect this vdso and do not dump it, since
+	 * it's auto-generated every new session if proxy required.
+	 */
+	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), PROT_WRITE);
+	vdso_put_mark((void *)vdso_rt_parked_at, vma->start);
+	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), VDSO_PROT);
+	return 0;
+}
diff --git a/arch/x86/vdso.c b/arch/x86/vdso.c
index 80f1c806e9af..6e24d38118c3 100644
--- a/arch/x86/vdso.c
+++ b/arch/x86/vdso.c
@@ -27,6 +27,8 @@
 #endif
 #define LOG_PREFIX "vdso: "
 
+struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
+u64 vdso_pfn = VDSO_BAD_PFN;
 /*
  * Find out proxy vdso vma and drop it from the list. Also
  * fix vdso status on vmas if wrong status found.
@@ -149,3 +151,48 @@ err:
 	close(fd);
 	return ret;
 }
+
+static int vdso_fill_self_symtable(struct vdso_symtable *s)
+{
+	char buf[512];
+	int ret = -1;
+	FILE *maps;
+
+	*s = (struct vdso_symtable)VDSO_SYMTABLE_INIT;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		pr_perror("Can't open self-vma");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), maps)) {
+		unsigned long start, end;
+
+		if (strstr(buf, "[vdso]") == NULL)
+			continue;
+
+		ret = sscanf(buf, "%lx-%lx", &start, &end);
+		if (ret != 2) {
+			ret = -1;
+			pr_err("Can't find vDSO bounds\n");
+			break;
+		}
+
+		s->vma_start = start;
+		s->vma_end = end;
+
+		ret = vdso_fill_symtable((void *)start, end - start, s);
+		break;
+	}
+
+	fclose(maps);
+	return ret;
+}
+
+int vdso_init(void)
+{
+	if (vdso_fill_self_symtable(&vdso_sym_rt))
+		return -1;
+	return vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn);
+}
diff --git a/include/vdso.h b/include/vdso.h
index 9685078f9c19..c0725a3bfd43 100644
--- a/include/vdso.h
+++ b/include/vdso.h
@@ -4,97 +4,20 @@
 #include <sys/mman.h>
 #include <stdbool.h>
 
-#include "asm/vdso.h"
-#include "asm/int.h"
-
-#define VDSO_PROT		(PROT_READ | PROT_EXEC)
-
-
-#define VDSO_BAD_ADDR		(-1ul)
-#define VDSO_BAD_PFN		(-1ull)
-
-struct vdso_symbol {
-	char		name[32];
-	unsigned long	offset;
-};
-
-#define VDSO_SYMBOL_INIT						\
-	{ .offset = VDSO_BAD_ADDR, }
-
-/* Check if symbol present in symtable */
-static inline bool vdso_symbol_empty(struct vdso_symbol *s)
-{
-	return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
-}
+#include "config.h"
 
-struct vdso_symtable {
-	unsigned long		vma_start;
-	unsigned long		vma_end;
-	struct vdso_symbol	symbols[VDSO_SYMBOL_MAX];
-};
+#ifdef CONFIG_VDSO
 
-#define VDSO_SYMTABLE_INIT						\
-	{								\
-		.vma_start	= VDSO_BAD_ADDR,			\
-		.vma_end	= VDSO_BAD_ADDR,			\
-		.symbols		= {				\
-			[0 ... VDSO_SYMBOL_MAX - 1] =			\
-				(struct vdso_symbol)VDSO_SYMBOL_INIT,	\
-			},						\
-	}
-
-#define VDSO_INIT_SYMTABLE(symtable)					\
-	*(symtable) = (struct vdso_symtable)VDSO_SYMTABLE_INIT
-
-/* Size of VMA associated with vdso */
-static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
-{
-	return t->vma_end - t->vma_start;
-}
-
-/*
- * Special mark which allows to identify runtime vdso where
- * calls from proxy vdso are redirected. This mark usually
- * placed at the start of vdso area where Elf header lives.
- * Since such runtime vdso is solevey used by proxy and
- * nobody else is supposed to access it, it's more-less
- * safe to screw the Elf header with @signature and
- * @proxy_addr.
- *
- * The @proxy_addr deserves a few comments. When we redirect
- * the calls from proxy to runtime vdso, on next checkpoint
- * it won't be possible to find which VMA is proxy, thus
- * we save its address in the member.
- */
-struct vdso_mark {
-	u64			signature;
-	unsigned long		proxy_addr;
-};
-
-/* Magic number (criuvdso) */
-#define VDSO_MARK_SIGNATURE	(0x6f73647675697263ULL)
-
-static inline bool is_vdso_mark(void *addr)
-{
-	struct vdso_mark *m = addr;
-
-	return m->signature == VDSO_MARK_SIGNATURE &&
-		m->proxy_addr != VDSO_BAD_ADDR;
-}
+#include "asm/vdso.h"
 
-static inline void vdso_put_mark(void *where, unsigned long proxy_addr)
-{
-	struct vdso_mark *m = where;
+#else /* CONFIG_VDSO */
 
-	m->signature = VDSO_MARK_SIGNATURE;
-	m->proxy_addr = proxy_addr;
-}
+#define vdso_init()						(0)
+#define parasite_fixup_vdso(ctl, pid, vma_area_list)		(0)
+#define vdso_vma_size(t)					(0)
+#define vdso_remap(who, from, to, size)				(0)
+#define vdso_proxify(who, sym_rt, vma, vdso_rt_parked_at)	(0)
 
-extern struct vdso_symtable vdso_sym_rt;
-extern u64 vdso_pfn;
-extern int vdso_init(void);
-extern int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size);
-extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
-extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma_entry, unsigned long vdso_rt_parked_at);
+#endif /* CONFIG_VDSO */
 
 #endif /* __CR_VDSO_H__ */
diff --git a/pie/Makefile b/pie/Makefile
index c75520ecbff2..f2bb030cead1 100644
--- a/pie/Makefile
+++ b/pie/Makefile
@@ -4,9 +4,10 @@ targets			+= restorer
 obj-y			+= log-simple.o
 obj-y			+= util.o
 obj-y			+= util-fd.o
-obj-y			+= $(VDSO_O)
 
+ifeq ($(VDSO),y)
 obj-e			+= $(ARCH_DIR)/vdso-pie.o
+endif
 
 parasite-obj-y		+= parasite.o
 parasite-asm-e		+= $(ARCH_DIR)/parasite-head.o
diff --git a/pie/vdso-stub.c b/pie/vdso-stub.c
deleted file mode 100644
index 37393719ac53..000000000000
--- a/pie/vdso-stub.c
+++ /dev/null
@@ -1,31 +0,0 @@
-#include <elf.h>
-
-#include <sys/mman.h>
-
-#include "compiler.h"
-#include "vdso.h"
-#include "syscall.h"
-#include "log.h"
-
-#include "asm/string.h"
-
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
-{
-	return 0;
-}
-
-int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
-{
-	return 0;
-}
-
-int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma, unsigned long vdso_rt_parked_at)
-{
-	return 0;
-}
diff --git a/pie/vdso.c b/pie/vdso.c
deleted file mode 100644
index 70bcf407cc2a..000000000000
--- a/pie/vdso.c
+++ /dev/null
@@ -1,255 +0,0 @@
-#include <elf.h>
-
-#include <sys/mman.h>
-
-#include "compiler.h"
-#include "vdso.h"
-#include "syscall.h"
-#include "log.h"
-#include "vma.h"
-
-#include "asm/string.h"
-
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-
-static unsigned int get_symbol_index(char *symbol, char *symbols[], size_t size)
-{
-	unsigned int i;
-
-	for (i = 0; symbol && i < size; i++) {
-		if (!builtin_strcmp(symbol, symbols[i]))
-			return i;
-	}
-
-	return VDSO_SYMBOL_MAX;
-}
-
-/* Check if pointer is out-of-bound */
-static bool __ptr_oob(void *ptr, void *start, size_t size)
-{
-	void *end = (void *)((unsigned long)start + size);
-	return ptr > end || ptr < start;
-}
-
-int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
-{
-	Elf64_Ehdr *ehdr = (void *)mem;
-	Elf64_Shdr *shdr, *shdr_strtab;
-	Elf64_Shdr *shdr_dynsym;
-	Elf64_Shdr *shdr_dynstr;
-	Elf64_Phdr *phdr;
-	Elf64_Shdr *text;
-	Elf64_Sym *sym;
-
-	char *section_names, *dynsymbol_names;
-
-	unsigned long base = VDSO_BAD_ADDR;
-	unsigned int i, j, k;
-
-	DECLARE_VDSO(vdso_ident, vdso_symbols);
-
-	BUILD_BUG_ON(sizeof(vdso_ident) != sizeof(ehdr->e_ident));
-
-	pr_debug("Parsing at %lx %lx\n",
-		 (long)mem, (long)mem + (long)size);
-
-	/*
-	 * Make sure it's a file we support.
-	 */
-	if (builtin_memcmp(ehdr->e_ident, vdso_ident, sizeof(vdso_ident))) {
-		pr_debug("Elf header magic mismatch\n");
-		goto err;
-	}
-
-	/*
-	 * Figure out base virtual address.
-	 */
-	phdr = (void *)&mem[ehdr->e_phoff];
-	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
-		if (__ptr_oob(phdr, mem, size))
-			goto err;
-		if (phdr->p_type == PT_LOAD) {
-			base = phdr->p_vaddr;
-			break;
-		}
-	}
-	if (base != VDSO_BAD_ADDR) {
-		pr_debug("Base address %lx\n", base);
-	} else {
-		pr_debug("No base address found\n");
-		goto err;
-	}
-
-	/*
-	 * Where the section names lays.
-	 */
-	if (ehdr->e_shstrndx == SHN_UNDEF) {
-		pr_err("Section names are not found\n");
-		goto err;
-	}
-
-	shdr = (void *)&mem[ehdr->e_shoff];
-	shdr_strtab = &shdr[ehdr->e_shstrndx];
-	if (__ptr_oob(shdr_strtab, mem, size))
-		goto err;
-
-	section_names = (void *)&mem[shdr_strtab->sh_offset];
-	shdr_dynsym = shdr_dynstr = text = NULL;
-
-	for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
-		if (__ptr_oob(shdr, mem, size))
-			goto err;
-		if (__ptr_oob(&section_names[shdr->sh_name], mem, size))
-			goto err;
-
-#if 0
-		pr_debug("section: %2d -> %s\n",
-			 i, &section_names[shdr->sh_name]);
-#endif
-
-		if (shdr->sh_type == SHT_DYNSYM &&
-		    builtin_strcmp(&section_names[shdr->sh_name],
-				   ".dynsym") == 0) {
-			shdr_dynsym = shdr;
-		} else if (shdr->sh_type == SHT_STRTAB &&
-			   builtin_strcmp(&section_names[shdr->sh_name],
-					  ".dynstr") == 0) {
-			shdr_dynstr = shdr;
-		} else if (shdr->sh_type == SHT_PROGBITS &&
-			   builtin_strcmp(&section_names[shdr->sh_name],
-					  ".text") == 0) {
-			text = shdr;
-		}
-	}
-
-	if (!shdr_dynsym || !shdr_dynstr || !text) {
-		pr_debug("No required sections found\n");
-		goto err;
-	}
-
-	dynsymbol_names = (void *)&mem[shdr_dynstr->sh_offset];
-	if (__ptr_oob(dynsymbol_names, mem, size)	||
-	    __ptr_oob(shdr_dynsym, mem, size)		||
-	    __ptr_oob(text, mem, size))
-		goto err;
-
-	/*
-	 * Walk over global symbols and choose ones we need.
-	 */
-	j = shdr_dynsym->sh_size / sizeof(*sym);
-	sym = (void *)&mem[shdr_dynsym->sh_offset];
-
-	for (i = 0; i < j; i++, sym++) {
-		if (__ptr_oob(sym, mem, size))
-			goto err;
-
-		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
-		    ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
-			continue;
-
-		if (__ptr_oob(&dynsymbol_names[sym->st_name], mem, size))
-			goto err;
-
-		k = get_symbol_index(&dynsymbol_names[sym->st_name],
-				     vdso_symbols,
-				     ARRAY_SIZE(vdso_symbols));
-		if (k != VDSO_SYMBOL_MAX) {
-			builtin_memcpy(t->symbols[k].name, vdso_symbols[k],
-				       sizeof(t->symbols[k].name));
-			t->symbols[k].offset = (unsigned long)sym->st_value - base;
-#if 0
-			pr_debug("symbol: %#-16lx %2d %s\n",
-				 t->symbols[k].offset, sym->st_shndx, t->symbols[k].name);
-#endif
-		}
-	}
-	return 0;
-err:
-	return -1;
-}
-
-int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
-{
-	unsigned long addr;
-
-	pr_debug("Remap %s %lx -> %lx\n", who, from, to);
-
-	addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
-	if (addr != to) {
-		pr_err("Unable to remap %lx -> %lx %lx\n",
-		       from, to, addr);
-		return -1;
-	}
-
-	return 0;
-}
-
-int vdso_proxify(char *who, struct vdso_symtable *sym_rt, VmaEntry *vma, unsigned long vdso_rt_parked_at)
-{
-	struct vdso_symtable s = VDSO_SYMTABLE_INIT;
-	size_t size = vma_entry_len(vma);
-	bool remap_rt = true;
-
-	/*
-	 * Find symbols in dumpee vdso.
-	 */
-	if (vdso_fill_symtable((void *)vma->start, size, &s))
-		return -1;
-
-	if (size == vdso_vma_size(sym_rt)) {
-		int i;
-
-		for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
-			if (s.symbols[i].offset != sym_rt->symbols[i].offset) {
-				remap_rt = false;
-				break;
-			}
-		}
-	} else
-		remap_rt = false;
-
-	/*
-	 * Easy case -- the vdso from image has same offsets and size
-	 * as runtime, so we simply remap runtime vdso to dumpee position
-	 * without generating any proxy.
-	 */
-	if (remap_rt) {
-		pr_info("Runtime vdso matches dumpee, remap inplace\n");
-
-		if (sys_munmap((void *)vma->start, size)) {
-			pr_err("Failed to unmap %s\n", who);
-			return -1;
-		}
-
-		return vdso_remap(who, vdso_rt_parked_at, vma->start, size);
-	}
-
-	/*
-	 * Now complex case -- we need to proxify calls. We redirect
-	 * calls from dumpee vdso to runtime vdso, making dumpee
-	 * to operate as proxy vdso.
-	 */
-	pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
-
-	if (vdso_redirect_calls((void *)vdso_rt_parked_at,
-				(void *)vma->start,
-				sym_rt, &s)) {
-		pr_err("Failed to proxify dumpee contents\n");
-		return -1;
-	}
-
-	/*
-	 * Put a special mark into runtime vdso, thus at next checkpoint
-	 * routine we could detect this vdso and do not dump it, since
-	 * it's auto-generated every new session if proxy required.
-	 */
-	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), PROT_WRITE);
-	vdso_put_mark((void *)vdso_rt_parked_at, vma->start);
-	sys_mprotect((void *)vdso_rt_parked_at,  vdso_vma_size(sym_rt), VDSO_PROT);
-	return 0;
-}
diff --git a/proc_parse.c b/proc_parse.c
index d0a1bfa79c87..5ad789171051 100644
--- a/proc_parse.c
+++ b/proc_parse.c
@@ -401,9 +401,14 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, bool use_map_file
 		} else if (strstr(buf, "[vsyscall]") || strstr(buf, "[vectors]")) {
 			vma_area->e->status |= VMA_AREA_VSYSCALL;
 		} else if (strstr(buf, "[vdso]")) {
+#ifdef CONFIG_VDSO
 			vma_area->e->status |= VMA_AREA_REGULAR;
 			if ((vma_area->e->prot & VDSO_PROT) == VDSO_PROT)
 				vma_area->e->status |= VMA_AREA_VDSO;
+#else
+			pr_warn_once("Found vDSO area without support\n");
+			goto err;
+#endif
 		} else if (strstr(buf, "[heap]")) {
 			vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
 		} else {
diff --git a/vdso-stub.c b/vdso-stub.c
deleted file mode 100644
index 60aa72d75cd1..000000000000
--- a/vdso-stub.c
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "vdso.h"
-#include "log.h"
-#include "util.h"
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-
-struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
-u64 vdso_pfn = VDSO_BAD_PFN;
-
-
-int vdso_init(void)
-{
-	return 0;
-}
diff --git a/vdso.c b/vdso.c
deleted file mode 100644
index d887e786e8dd..000000000000
--- a/vdso.c
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "vdso.h"
-#include "log.h"
-#include "util.h"
-
-#ifdef LOG_PREFIX
-# undef LOG_PREFIX
-#endif
-#define LOG_PREFIX "vdso: "
-
-
-struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
-u64 vdso_pfn = VDSO_BAD_PFN;
-
-static int vdso_fill_self_symtable(struct vdso_symtable *s)
-{
-	char buf[512];
-	int ret = -1;
-	FILE *maps;
-
-	VDSO_INIT_SYMTABLE(s);
-
-	maps = fopen("/proc/self/maps", "r");
-	if (!maps) {
-		pr_perror("Can't open self-vma");
-		return -1;
-	}
-
-	while (fgets(buf, sizeof(buf), maps)) {
-		unsigned long start, end;
-
-		if (strstr(buf, "[vdso]") == NULL)
-			continue;
-
-		ret = sscanf(buf, "%lx-%lx", &start, &end);
-		if (ret != 2) {
-			ret = -1;
-			pr_err("Can't find vDSO bounds\n");
-			break;
-		}
-
-		s->vma_start = start;
-		s->vma_end = end;
-
-		ret = vdso_fill_symtable((void *)start, end - start, s);
-		break;
-	}
-
-	fclose(maps);
-	return ret;
-}
-
-int vdso_init(void)
-{
-	if (vdso_fill_self_symtable(&vdso_sym_rt))
-		return -1;
-
-	return vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn);
-}
-
-- 
1.9.3



More information about the CRIU mailing list