[CRIU] [PATCH 3/4] arch: x86 -- Add dumping of vDSO layout

Andrew Vagin avagin at parallels.com
Tue Apr 16 06:12:11 EDT 2013


On Tue, Apr 16, 2013 at 01:26:10PM +0400, Cyrill Gorcunov wrote:
> 
> Here we introduce vDSO dumping. Because vDSO is generated by a kernel
> and all processes in a system do host the same vDSO content, we simply
> dump own crtools vDSO not touching dumpee memory at all.
> 
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
>  arch/x86/Makefile  |   1 +
>  arch/x86/crtools.c | 123 ++++++++++++++++++++++++++++++++
>  arch/x86/vdso.c    | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  cr-dump.c          |   4 ++
>  include/vdso.h     | 102 ++++++++++++++++++++++++++
>  5 files changed, 436 insertions(+)
>  create mode 100644 arch/x86/vdso.c
>  create mode 100644 include/vdso.h
> 

> diff --git a/arch/x86/Makefile b/arch/x86/Makefile
> index 8e950a3..227b18a 100644
> --- a/arch/x86/Makefile
> +++ b/arch/x86/Makefile
> @@ -6,6 +6,7 @@ SYS-ASM		:= syscalls.S
>  syscalls-asm-y	+= $(SYS-ASM:.S=).o
>  crtools-obj-y	+= crtools.o
>  crtools-obj-y	+= cpu.o
> +crtools-obj-y	+= vdso.o
>  
>  SYS-DEF		:= syscall-x86-64.def
>  SYS-ASM-COMMON	:= syscall-common-x86-64.S
> diff --git a/arch/x86/crtools.c b/arch/x86/crtools.c
> index e7e9d63..06d6631 100644
> --- a/arch/x86/crtools.c
> +++ b/arch/x86/crtools.c
> @@ -2,6 +2,8 @@
>  #include <unistd.h>
>  #include <elf.h>
>  
> +#include <sys/mman.h>
> +
>  #include "asm/processor-flags.h"
>  #include "asm/types.h"
>  #include "asm/fpu.h"
> @@ -14,10 +16,12 @@
>  #include "log.h"
>  #include "util.h"
>  #include "cpu.h"
> +#include "vdso.h"
>  
>  #include "protobuf.h"
>  #include "protobuf/core.pb-c.h"
>  #include "protobuf/creds.pb-c.h"
> +#include "protobuf/vdso.pb-c.h"
>  
>  /*
>   * Injected syscall instruction
> @@ -106,6 +110,125 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
>  	return 0;
>  }
>  
> +int arch_fill_self_vdso(symtable_t *t)
> +{
> +	char buf[512];
> +	int ret = -1;
> +	FILE *maps;
> +
> +	maps = fopen("/proc/self/maps", "r");
> +	if (!maps) {
> +		pr_perror("Can't open self-vma");
> +		return -1;
> +	}
> +
> +	while (fgets(buf, sizeof(buf), maps)) {
> +		unsigned long start, end;
> +
> +		if (strstr(buf, "[vdso]") == NULL)
> +			continue;
> +
> +		ret = sscanf(buf, "%lx-%lx", &start, &end);
> +		if (ret != 2) {
> +			ret = -1;
> +			pr_err("Can't find vDSO bounds\n");
> +			break;
> +		}
> +
> +		pr_debug("vdso: Got area %lx-%lx\n", start, end);
> +
> +		t->vma_start = start;
> +		t->vma_end = end;
> +		ret = arch_parse_vdso((void *)start, end - start, t);
> +		break;
> +	}
> +
> +	fclose(maps);
> +	return ret;
> +}
> +
> +int arch_read_vdso_layout(symtable_t *t)
> +{
> +	unsigned int i, nr_entries = 0;
> +	VdsoSymbolEntry *symbol;
> +	int ret = -1, fd;
> +
> +	fd = open_image(CR_FD_VDSO, O_RSTR);
> +	if (fd < 0)
> +		goto err;
> +
> +	INIT_SYMTABLE(t);
> +
> +	while (1) {
> +		ret = pb_read_one_eof(fd, &symbol, PB_VDSO);
> +		if (ret < 0)
> +			goto err;
> +		else if (ret == 0)
> +			break;
> +
> +		pr_debug("vdso: read name %s offset %lx\n",
> +			 symbol->name, symbol->offset);
> +
> +		i = arch_vdso_get_symbol_index(symbol->name);
> +		if (i == VDSO_SYMBOL_MAX) {
> +			pr_err("vDSO symbol %s is not reconized\n",
> +			       symbol->name);
> +			goto err;
> +		}
> +
> +		strncpy(t->sym[i].name, symbol->name, sizeof(t->sym[i].name));
> +		t->sym[i].name[sizeof(t->sym[i].name) - 1] = '\0';
> +		t->sym[i].offset = symbol->offset;
> +
> +		vdso_symbol_entry__free_unpacked(symbol, NULL);
> +
> +		nr_entries++;
> +	}
> +
> +	/* Verify read data */
> +	if (nr_entries) {
> +		for (i = 0; i < ARRAY_SIZE(t->sym); i++) {
> +			if (!arch_is_vdso_symbol_valid(&t->sym[i])) {
> +				pr_err("Invalid vDSO data for symbol %s\n",
> +				       arch_vdso_get_symbol_name(i));
> +				goto err;
> +			}
> +		}
> +	}
> +	ret = 0;
> +
> +err:
> +	close(fd);
> +	return ret;
> +}
> +
> +int arch_dump_vdso_layout(void)
> +{
> +	VdsoSymbolEntry symbol = VDSO_SYMBOL_ENTRY__INIT;
> +	symtable_t t = { };
> +	int ret, fd, i;
> +
> +	ret = arch_fill_self_vdso(&t);
> +	if (ret)
> +		goto err;
> +
> +	ret = -1;
> +	fd = open_image(CR_FD_VDSO, O_DUMP);
> +	if (fd < 0)
> +		goto err;
> +
> +	ret = 0;
> +	for (i = 0; ret == 0 && i < VDSO_SYMBOL_MAX; i++) {
> +		symbol.name = t.sym[i].name;
> +		symbol.offset = t.sym[i].offset;
> +		ret = pb_write_one(fd, &symbol, PB_VDSO);
> +	}
> +
> +	close(fd);
> +err:
> +	return ret;
> +}
> +
>  int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl)
>  {
>  	struct xsave_struct xsave	= {  };
> diff --git a/arch/x86/vdso.c b/arch/x86/vdso.c
> new file mode 100644
> index 0000000..6c088fd
> --- /dev/null
> +++ b/arch/x86/vdso.c
> @@ -0,0 +1,206 @@
> +/*
> + * WARN This file is used in several places over the project.
> + *      Please don't add any non PIE function.
> + */
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <string.h>
> +
> +#include <sys/types.h>
> +
> +#include "asm/elf.h"
> +
> +#include "compiler.h"
> +#include "xmalloc.h"
> +#include "vdso.h"
> +
> +#ifdef LOG_PREFIX
> +#undef LOG_PREFIX
> +#endif
> +#define LOG_PREFIX "vdso: "
> +
> +static const char *vdso_x86_symbols[VDSO_SYMBOL_MAX] = {
> +	[VDSO_SYMBOL_GETTIMEOFDAY]	= "__vdso_gettimeofday",
> +	[VDSO_SYMBOL_GETCPU]		= "__vdso_getcpu",
> +	[VDSO_SYMBOL_CLOCK_GETTIME]	= "__vdso_clock_gettime",
> +	[VDSO_SYMBOL_TIME]		= "__vdso_time",
> +};
> +
> +const char *arch_vdso_get_symbol_name(unsigned int index)
> +{
> +	if (index < ARRAY_SIZE(vdso_x86_symbols))
> +		return vdso_x86_symbols[index];
> +
> +	return "Unknown";
> +}
> +
> +unsigned int arch_vdso_get_symbol_index(char *symbol)
> +{
> +	unsigned int i;
> +
> +	/*
> +	 * It's not a problem for small size of array, but
> +	 * be ready to change it for some faster algo.
> +	 */
> +	for (i = 0; symbol && i < ARRAY_SIZE(vdso_x86_symbols); i++) {
> +		if (!strcmp(symbol, vdso_x86_symbols[i]))
> +			return i;
> +	}
> +
> +	return VDSO_SYMBOL_MAX;
> +}
> +
> +static const char vdso_ident[] = {
> +	0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
> +	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +};
> +
> +typedef struct {
> +	u16	movabs;
> +	u64	imm64;
> +	u16	jmp_rax;
> +	u32	guards;
> +} __packed jmp_t;
> +
> +int arch_proxify_vdso(void *base_to, void *base_from, symtable_t *to, symtable_t *from)

I think this function should be in the next patch

> +{
> +	jmp_t jmp = {
> +		.movabs		= 0xb848,
> +		.jmp_rax	= 0xe0ff,
> +		.guards		= 0xcccccccc,
> +	};
> +	unsigned int i;
> +
> +	/*
> +	 * We support forward jumps only, for simplicity
> +	 * reason, thus the caller must provide us validated
> +	 * data only.
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(to->sym); i++) {
> +		if (arch_is_vdso_symbol_empty(&from->sym[i]))
> +			continue;
> +
> +		pr_debug("jmp: %lx/%lx -> %lx/%lx\n",
> +			 (unsigned long)base_from, from->sym[i].offset,
> +			 (unsigned long)base_to, to->sym[i].offset);
> +
> +		jmp.imm64 = (unsigned long)base_to + to->sym[i].offset;
> +
> +		memcpy((void *)(base_from + from->sym[i].offset), &jmp, sizeof(jmp));
> +	}
> +
> +	return 0;
> +}
> +
> +int arch_parse_vdso(char *mem, size_t size, symtable_t *t)
> +{
> +	Elf64_Ehdr *ehdr = (void *)mem;
> +	Elf64_Shdr *shdr, *shdr_strtab;
> +	Elf64_Shdr *shdr_dynsym, *shdr_dynstr;
> +	Elf64_Phdr *phdr;
> +	Elf64_Shdr *text;
> +	Elf64_Sym *sym;
> +
> +	char *section_names, *dynsymbol_names;
> +
> +	unsigned long base = VDSO_BAD_ADDR;
> +	unsigned int i, j, k;
> +
> +	BUILD_BUG_ON(sizeof(vdso_ident) != sizeof(ehdr->e_ident));
> +
> +	/*
> +	 * Make sure it's a file we support.
> +	 */
> +	for (i = 0; i < sizeof(vdso_ident); i++) {
> +		if (ehdr->e_ident[i] != vdso_ident[i]) {
> +			pr_err("Elf header magic mismatch\n");
> +			goto err;
> +		}
> +	}
> +
> +	/*
> +	 * Figure out base virtual address.
> +	 */
> +	phdr = (void *)&mem[ehdr->e_phoff];
> +	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
> +		if (phdr->p_type == PT_LOAD) {
> +			base = phdr->p_vaddr;
> +			break;
> +		}
> +	}
> +	if (base != VDSO_BAD_ADDR) {
> +		pr_debug("Base address %lx\n", base);
> +	} else {
> +		pr_err("No base address found\n");
> +		goto err;
> +	}
> +
> +	/*
> +	 * Where the section names lays.
> +	 */
> +	if (ehdr->e_shstrndx == SHN_UNDEF) {
> +		pr_err("Section names are not found\n");
> +		goto err;
> +	}
> +
> +	shdr = (void *)&mem[ehdr->e_shoff];
> +	shdr_strtab = &shdr[ehdr->e_shstrndx];
> +	section_names = (void *)&mem[shdr_strtab->sh_offset];
> +
> +	shdr_dynsym = shdr_dynstr = text = NULL;
> +
> +	shdr = (void *)&mem[ehdr->e_shoff];
> +	for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
> +
> +		pr_debug("section: %2d -> %s\n",
> +			 i, &section_names[shdr->sh_name]);
> +
> +		if (shdr->sh_type == SHT_DYNSYM &&
> +		    strcmp(&section_names[shdr->sh_name],
> +			   ".dynsym") == 0) {
> +			shdr_dynsym = shdr;
> +		} else if (shdr->sh_type == SHT_STRTAB &&
> +		    strcmp(&section_names[shdr->sh_name],
> +			   ".dynstr") == 0) {
> +			shdr_dynstr = shdr;
> +		} else if (shdr->sh_type == SHT_PROGBITS &&
> +		    strcmp(&section_names[shdr->sh_name],
> +			   ".text") == 0) {
> +			text = shdr;
> +		}
> +	}
> +
> +	if (!shdr_dynsym || !shdr_dynstr || !text) {
> +		pr_err("No required sections found\n");
> +		goto err;
> +	}
> +
> +	dynsymbol_names = (void *)&mem[shdr_dynstr->sh_offset];
> +
> +	/*
> +	 * Walk over global symbols and choose ones we need.
> +	 */
> +	j = shdr_dynsym->sh_size / sizeof(*sym);
> +	sym = (void *)&mem[shdr_dynsym->sh_offset];
> +
> +	for (i = 0; i < j; i++, sym++) {
> +		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
> +		    ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
> +			continue;
> +
> +		k = arch_vdso_get_symbol_index(&dynsymbol_names[sym->st_name]);
> +		if (k != VDSO_SYMBOL_MAX) {
> +			memcpy(t->sym[k].name, vdso_x86_symbols[k],
> +			       sizeof(t->sym[k].name));
> +			t->sym[k].offset = (unsigned long)sym->st_value - base;
> +		}
> +		pr_debug("%csymbol: %#-16lx %2d %s\n",
> +			 k != VDSO_SYMBOL_MAX ? '+' : '-',
> +			 t->sym[k].offset, sym->st_shndx, t->sym[k].name);
> +	}
> +
> +	return 0;
> +err:
> +	return -1;
> +}
> diff --git a/cr-dump.c b/cr-dump.c
> index 5743551..3e9d4bf 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -59,6 +59,7 @@
>  #include "file-lock.h"
>  #include "page-xfer.h"
>  #include "kerndat.h"
> +#include "vdso.h"
>  
>  #include "asm/dump.h"
>  
> @@ -1583,6 +1584,9 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
>  	if (collect_sockets(pid))
>  		goto err;
>  
> +	if (arch_dump_vdso_layout())
> +		goto err;
> +
>  	glob_fdset = cr_glob_fdset_open(O_DUMP);
>  	if (!glob_fdset)
>  		goto err;
> diff --git a/include/vdso.h b/include/vdso.h
> new file mode 100644
> index 0000000..02d60c1
> --- /dev/null
> +++ b/include/vdso.h
> @@ -0,0 +1,102 @@
> +#ifndef __CR_VDSO_H__
> +#define __CR_VDSO_H__
> +
> +#include <stdbool.h>
> +#include <string.h>
> +#include <sys/types.h>
> +
> +#include "asm/int.h"
> +#include "compiler.h"
> +
> +/*
> + * This is a minimal amount of symbols
> + * we should support at the moment.
> + */
> +enum {
> +	VDSO_SYMBOL_GETTIMEOFDAY	= 0,
> +	VDSO_SYMBOL_GETCPU,
> +	VDSO_SYMBOL_CLOCK_GETTIME,
> +	VDSO_SYMBOL_TIME,
> +
> +	VDSO_SYMBOL_MAX
> +};
> +
> +#define VDSO_BAD_ADDR	(-1ul)
> +
> +typedef struct symbol_s {
> +	char		name[32];
> +	unsigned long	offset;
> +} symbol_t;
> +
> +#define SYMBOL_INIT						\
> +	{ .offset = VDSO_BAD_ADDR, }
> +
> +typedef struct symtable_s {
> +	unsigned long	vma_start;
> +	unsigned long	vma_end;
> +	symbol_t	sym[VDSO_SYMBOL_MAX];
> +} symtable_t;
> +
> +#define symtable_vma_size(s)					\
> +	(unsigned long)((s)->vma_end - (s)->vma_start)
> +
> +#define SYMTABLE_INIT						\
> +	{							\
> +		.vma_start	= VDSO_BAD_ADDR,		\
> +		.vma_end	= VDSO_BAD_ADDR,		\
> +		.sym		= {				\
> +			[0 ... VDSO_SYMBOL_MAX - 1] =		\
> +				(symbol_t) SYMBOL_INIT,		\
> +			},					\
> +	}
> +
> +#define INIT_SYMTABLE(symtable)					\
> +	*(symtable) = (symtable_t) SYMTABLE_INIT
> +
> +static inline bool arch_is_vdso_symbol_empty(symbol_t *s)
> +{
> +	return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
> +}
> +
> +static inline bool arch_is_vdso_symbol_valid(symbol_t *s)
> +{
> +	if (!arch_is_vdso_symbol_empty(s)) {
> +		if (s->offset == VDSO_BAD_ADDR ||
> +		    s->name[0] == '\0')
> +			return false;
> +	}
> +
> +	return true;
> +}
> +
> +static inline bool arch_is_vdso_symbols_empty(symtable_t *t)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(t->sym); i++) {
> +		if (!arch_is_vdso_symbol_empty(&t->sym[i]))
> +			return false;
> +	}
> +
> +	return true;
> +}
> +
> +#if CONFIG_X86_64
> +extern const char *arch_vdso_get_symbol_name(unsigned int index);
> +extern unsigned int arch_vdso_get_symbol_index(char *symbol);
> +extern int arch_fill_self_vdso(symtable_t *t);
> +extern int arch_read_vdso_layout(symtable_t *t);
> +extern int arch_dump_vdso_layout(void);
> +extern int arch_parse_vdso(char *mem, size_t size, symtable_t *t);
> +extern int arch_proxify_vdso(void *base_to, void *base_from, symtable_t *to, symtable_t *from);
> +#else
> +extern const char *arch_vdso_get_symbol_name(unsigned int index) { return NULL; }
> +static inline unsigned int arch_vdso_get_symbol_index(char *symbol) { return VDSO_SYMBOL_MAX; };
> +static inline int arch_fill_self_vdso(symtable_t *t) { return 0; }
> +static inline int arch_read_vdso_layout(symtable_t *t) { return 0; }
> +static inline int arch_dump_vdso_layout(void) { }
> +static inline int arch_parse_vdso(char *mem, size_t size, symtable_t *t) { return 0; }
> +static inline int arch_proxify_vdso(void *base_to, void *base_from, symtable_t *to, symtable_t *from) { return 0; }
> +#endif
> +
> +#endif /* __CR_VDSO_H__ */



More information about the CRIU mailing list