[PATCH 3/4] arch: x86 -- Add dumping of vDSO layout

Cyrill Gorcunov gorcunov at openvz.org
Sat Apr 13 17:36:17 EDT 2013


Here we introduce vDSO dumping. Because vDSO is generated by a kernel
and all processes in a system do host the same vDSO content, we simply
dump own crtools vDSO not touching dumpee memory at all.

Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
---
 arch/x86/Makefile  |   1 +
 arch/x86/crtools.c |  68 ++++++++++++++++++++++
 arch/x86/vdso.c    | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 cr-dump.c          |   4 ++
 include/vdso.h     |  98 +++++++++++++++++++++++++++++++
 5 files changed, 336 insertions(+)
 create mode 100644 arch/x86/vdso.c
 create mode 100644 include/vdso.h

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 8e950a3..227b18a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -6,6 +6,7 @@ SYS-ASM		:= syscalls.S
 syscalls-asm-y	+= $(SYS-ASM:.S=).o
 crtools-obj-y	+= crtools.o
 crtools-obj-y	+= cpu.o
+crtools-obj-y	+= vdso.o
 
 SYS-DEF		:= syscall-x86-64.def
 SYS-ASM-COMMON	:= syscall-common-x86-64.S
diff --git a/arch/x86/crtools.c b/arch/x86/crtools.c
index e7e9d63..e061816 100644
--- a/arch/x86/crtools.c
+++ b/arch/x86/crtools.c
@@ -2,6 +2,8 @@
 #include <unistd.h>
 #include <elf.h>
 
+#include <sys/mman.h>
+
 #include "asm/processor-flags.h"
 #include "asm/types.h"
 #include "asm/fpu.h"
@@ -14,10 +16,12 @@
 #include "log.h"
 #include "util.h"
 #include "cpu.h"
+#include "vdso.h"
 
 #include "protobuf.h"
 #include "protobuf/core.pb-c.h"
 #include "protobuf/creds.pb-c.h"
+#include "protobuf/vdso.pb-c.h"
 
 /*
  * Injected syscall instruction
@@ -106,6 +110,70 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
 	return 0;
 }
 
+int arch_fill_self_vdso(symtable_t *t)
+{
+	char buf[512];
+	int ret = -1;
+	FILE *maps;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		pr_perror("Can't open self-vma");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), maps)) {
+		unsigned long start, end;
+
+		if (strstr(buf, "[vdso]") == NULL)
+			continue;
+
+		ret = sscanf(buf, "%lx-%lx", &start, &end);
+		if (ret != 2) {
+			ret = -1;
+			pr_err("Can't find vDSO bounds\n");
+			break;
+		}
+
+		pr_debug("vdso: Got area %lx-%lx\n", start, end);
+
+		t->vma_start = start;
+		t->vma_end = end;
+		ret = arch_parse_vdso((void *)start, end - start, t);
+		break;
+	}
+
+	fclose(maps);
+	return ret;
+}
+
+int arch_dump_vdso_layout(void)
+{
+	VdsoSymbolEntry symbol = VDSO_SYMBOL_ENTRY__INIT;
+	symtable_t t = { };
+	int ret, fd, i;
+
+	ret = arch_fill_self_vdso(&t);
+	if (ret)
+		goto err;
+
+	ret = -1;
+	fd = open_image(CR_FD_VDSO, O_DUMP);
+	if (fd < 0)
+		goto err;
+
+	ret = 0;
+	for (i = 0; ret == 0 && i < VDSO_SYMBOL_MAX; i++) {
+		symbol.name = t.sym[i].name;
+		symbol.offset = t.sym[i].offset;
+		ret = pb_write_one(fd, &symbol, PB_VDSO);
+	}
+
+	close(fd);
+err:
+	return ret;
+}
+
 int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl)
 {
 	struct xsave_struct xsave	= {  };
diff --git a/arch/x86/vdso.c b/arch/x86/vdso.c
new file mode 100644
index 0000000..2d7c715
--- /dev/null
+++ b/arch/x86/vdso.c
@@ -0,0 +1,165 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+
+#include "asm/elf.h"
+
+#include "compiler.h"
+#include "xmalloc.h"
+#include "vdso.h"
+
+#ifdef LOG_PREFIX
+#undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+
+static const char *vdso_x86_symbols[VDSO_SYMBOL_MAX] = {
+	[VDSO_SYMBOL_GETTIMEOFDAY]	= "__vdso_gettimeofday",
+	[VDSO_SYMBOL_GETCPU]		= "__vdso_getcpu",
+	[VDSO_SYMBOL_CLOCK_GETTIME]	= "__vdso_clock_gettime",
+	[VDSO_SYMBOL_TIME]		= "__vdso_time",
+};
+
+const char *arch_vdso_get_symbol_name(unsigned int index)
+{
+	if (index < ARRAY_SIZE(vdso_x86_symbols))
+		return vdso_x86_symbols[index];
+
+	return "Unknown";
+}
+
+unsigned int arch_vdso_get_symbol_index(char *symbol)
+{
+	unsigned int i;
+
+	/*
+	 * It's not a problem for small size of array, but
+	 * be ready to change it for some faster algo.
+	 */
+	for (i = 0; symbol && i < ARRAY_SIZE(vdso_x86_symbols); i++) {
+		if (!strcmp(symbol, vdso_x86_symbols[i]))
+			return i;
+	}
+
+	return VDSO_SYMBOL_MAX;
+}
+
+static const char vdso_ident[] = {
+	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+int arch_parse_vdso(char *mem, size_t size, symtable_t *t)
+{
+	Elf64_Ehdr *ehdr = (void *)mem;
+	Elf64_Shdr *shdr, *shdr_strtab;
+	Elf64_Shdr *shdr_dynsym, *shdr_dynstr;
+	Elf64_Phdr *phdr;
+	Elf64_Shdr *text;
+	Elf64_Sym *sym;
+
+	char *section_names, *dynsymbol_names;
+
+	unsigned long base = VDSO_BAD_ADDR;
+	unsigned int i, j, k;
+
+	BUILD_BUG_ON(sizeof(vdso_ident) != sizeof(ehdr->e_ident));
+
+	/*
+	 * Make sure it's a file we support.
+	 */
+	for (i = 0; i < sizeof(vdso_ident); i++) {
+		if (ehdr->e_ident[i] != vdso_ident[i]) {
+			pr_err("Elf header magic mismatch\n");
+			goto err;
+		}
+	}
+
+	/*
+	 * Figure out base virtual address.
+	 */
+	phdr = (void *)&mem[ehdr->e_phoff];
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (phdr->p_type == PT_LOAD) {
+			base = phdr->p_vaddr;
+			break;
+		}
+	}
+	if (base != VDSO_BAD_ADDR) {
+		pr_debug("Base address %lx\n", base);
+	} else {
+		pr_err("No base address found\n");
+		goto err;
+	}
+
+	/*
+	 * Where the section names lays.
+	 */
+	if (ehdr->e_shstrndx == SHN_UNDEF) {
+		pr_err("Section names are not found\n");
+		goto err;
+	}
+
+	shdr = (void *)&mem[ehdr->e_shoff];
+	shdr_strtab = &shdr[ehdr->e_shstrndx];
+	section_names = (void *)&mem[shdr_strtab->sh_offset];
+
+	shdr_dynsym = shdr_dynstr = text = NULL;
+
+	shdr = (void *)&mem[ehdr->e_shoff];
+	for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
+
+		pr_debug("section: %2d -> %s\n",
+			 i, &section_names[shdr->sh_name]);
+
+		if (shdr->sh_type == SHT_DYNSYM &&
+		    strcmp(&section_names[shdr->sh_name],
+			   ".dynsym") == 0) {
+			shdr_dynsym = shdr;
+		} else if (shdr->sh_type == SHT_STRTAB &&
+		    strcmp(&section_names[shdr->sh_name],
+			   ".dynstr") == 0) {
+			shdr_dynstr = shdr;
+		} else if (shdr->sh_type == SHT_PROGBITS &&
+		    strcmp(&section_names[shdr->sh_name],
+			   ".text") == 0) {
+			text = shdr;
+		}
+	}
+
+	if (!shdr_dynsym || !shdr_dynstr || !text) {
+		pr_err("No required sections found\n");
+		goto err;
+	}
+
+	dynsymbol_names = (void *)&mem[shdr_dynstr->sh_offset];
+
+	/*
+	 * Walk over global symbols and choose ones we need.
+	 */
+	j = shdr_dynsym->sh_size / sizeof(*sym);
+	sym = (void *)&mem[shdr_dynsym->sh_offset];
+
+	for (i = 0; i < j; i++, sym++) {
+		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
+		    ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+			continue;
+
+		k = arch_vdso_get_symbol_index(&dynsymbol_names[sym->st_name]);
+		if (k != VDSO_SYMBOL_MAX) {
+			memcpy(t->sym[k].name, vdso_x86_symbols[k],
+			       sizeof(t->sym[k].name));
+			t->sym[k].offset = (unsigned long)sym->st_value - base;
+		}
+		pr_debug("%csymbol: %#-16lx %2d %s\n",
+			 k != VDSO_SYMBOL_MAX ? '+' : '-',
+			 t->sym[k].offset, sym->st_shndx, t->sym[k].name);
+	}
+
+	return 0;
+err:
+	return -1;
+}
diff --git a/cr-dump.c b/cr-dump.c
index 5743551..3e9d4bf 100644
--- a/cr-dump.c
+++ b/cr-dump.c
@@ -59,6 +59,7 @@
 #include "file-lock.h"
 #include "page-xfer.h"
 #include "kerndat.h"
+#include "vdso.h"
 
 #include "asm/dump.h"
 
@@ -1583,6 +1584,9 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
 	if (collect_sockets(pid))
 		goto err;
 
+	if (arch_dump_vdso_layout())
+		goto err;
+
 	glob_fdset = cr_glob_fdset_open(O_DUMP);
 	if (!glob_fdset)
 		goto err;
diff --git a/include/vdso.h b/include/vdso.h
new file mode 100644
index 0000000..b41451b
--- /dev/null
+++ b/include/vdso.h
@@ -0,0 +1,98 @@
+#ifndef __CR_VDSO_H__
+#define __CR_VDSO_H__
+
+#include <stdbool.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "asm/int.h"
+#include "compiler.h"
+
+/*
+ * This is a minimal amount of symbols
+ * we should support at the moment.
+ */
+enum {
+	VDSO_SYMBOL_GETTIMEOFDAY	= 0,
+	VDSO_SYMBOL_GETCPU,
+	VDSO_SYMBOL_CLOCK_GETTIME,
+	VDSO_SYMBOL_TIME,
+
+	VDSO_SYMBOL_MAX
+};
+
+#define VDSO_BAD_ADDR	(-1ul)
+
+typedef struct symbol_s {
+	char		name[32];
+	unsigned long	offset;
+} symbol_t;
+
+#define SYMBOL_INIT						\
+	{ .offset = VDSO_BAD_ADDR, }
+
+typedef struct symtable_s {
+	unsigned long	vma_start;
+	unsigned long	vma_end;
+	symbol_t	sym[VDSO_SYMBOL_MAX];
+} symtable_t;
+
+#define symtable_vma_size(s)					\
+	(unsigned long)((s)->vma_end - (s)->vma_start)
+
+#define SYMTABLE_INIT						\
+	{							\
+		.vma_start	= VDSO_BAD_ADDR,		\
+		.vma_end	= VDSO_BAD_ADDR,		\
+		.sym		= {				\
+			[0 ... VDSO_SYMBOL_MAX - 1] =		\
+				(symbol_t) SYMBOL_INIT,		\
+			},					\
+	}
+
+#define INIT_SYMTABLE(symtable)					\
+	*(symtable) = (symtable_t) SYMTABLE_INIT
+
+static inline bool arch_is_vdso_symbol_empty(symbol_t *s)
+{
+	return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
+}
+
+static inline bool arch_is_vdso_symbol_valid(symbol_t *s)
+{
+	if (!arch_is_vdso_symbol_empty(s)) {
+		if (s->offset == VDSO_BAD_ADDR ||
+		    s->name[0] == '\0')
+			return false;
+	}
+
+	return true;
+}
+
+static inline bool arch_is_vdso_symbols_empty(symtable_t *t)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(t->sym); i++) {
+		if (!arch_is_vdso_symbol_empty(&t->sym[i]))
+			return false;
+	}
+
+	return true;
+}
+
+#if CONFIG_X86_64
+extern const char *arch_vdso_get_symbol_name(unsigned int index);
+extern unsigned int arch_vdso_get_symbol_index(char *symbol);
+extern int arch_fill_self_vdso(symtable_t *t);
+extern int arch_dump_vdso_layout(void);
+extern int arch_parse_vdso(char *mem, size_t size, symtable_t *t);
+#else
+extern const char *arch_vdso_get_symbol_name(unsigned int index) { return NULL; }
+static inline unsigned int arch_vdso_get_symbol_index(char *symbol) { return VDSO_SYMBOL_MAX; };
+static inline int arch_fill_self_vdso(symtable_t *t) { return 0; }
+static inline int arch_dump_vdso_layout(void) { }
+static inline int arch_parse_vdso(char *mem, size_t size, symtable_t *t) { return 0; }
+#endif
+
+#endif /* __CR_VDSO_H__ */
-- 
1.8.1.4


--xo44VMWPx7vlQ2+2--


More information about the CRIU mailing list