[CRIU] [PATCH 3/4] arch: x86 -- Add dumping of vDSO layout
Andrew Vagin
avagin at parallels.com
Tue Apr 16 06:12:11 EDT 2013
On Tue, Apr 16, 2013 at 01:26:10PM +0400, Cyrill Gorcunov wrote:
>
> Here we introduce vDSO dumping. Because vDSO is generated by a kernel
> and all processes in a system do host the same vDSO content, we simply
> dump own crtools vDSO not touching dumpee memory at all.
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
> ---
> arch/x86/Makefile | 1 +
> arch/x86/crtools.c | 123 ++++++++++++++++++++++++++++++++
> arch/x86/vdso.c | 206 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> cr-dump.c | 4 ++
> include/vdso.h | 102 ++++++++++++++++++++++++++
> 5 files changed, 436 insertions(+)
> create mode 100644 arch/x86/vdso.c
> create mode 100644 include/vdso.h
>
> diff --git a/arch/x86/Makefile b/arch/x86/Makefile
> index 8e950a3..227b18a 100644
> --- a/arch/x86/Makefile
> +++ b/arch/x86/Makefile
> @@ -6,6 +6,7 @@ SYS-ASM := syscalls.S
> syscalls-asm-y += $(SYS-ASM:.S=).o
> crtools-obj-y += crtools.o
> crtools-obj-y += cpu.o
> +crtools-obj-y += vdso.o
>
> SYS-DEF := syscall-x86-64.def
> SYS-ASM-COMMON := syscall-common-x86-64.S
> diff --git a/arch/x86/crtools.c b/arch/x86/crtools.c
> index e7e9d63..06d6631 100644
> --- a/arch/x86/crtools.c
> +++ b/arch/x86/crtools.c
> @@ -2,6 +2,8 @@
> #include <unistd.h>
> #include <elf.h>
>
> +#include <sys/mman.h>
> +
> #include "asm/processor-flags.h"
> #include "asm/types.h"
> #include "asm/fpu.h"
> @@ -14,10 +16,12 @@
> #include "log.h"
> #include "util.h"
> #include "cpu.h"
> +#include "vdso.h"
>
> #include "protobuf.h"
> #include "protobuf/core.pb-c.h"
> #include "protobuf/creds.pb-c.h"
> +#include "protobuf/vdso.pb-c.h"
>
> /*
> * Injected syscall instruction
> @@ -106,6 +110,125 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
> return 0;
> }
>
> +int arch_fill_self_vdso(symtable_t *t)
> +{
> + char buf[512];
> + int ret = -1;
> + FILE *maps;
> +
> + maps = fopen("/proc/self/maps", "r");
> + if (!maps) {
> + pr_perror("Can't open self-vma");
> + return -1;
> + }
> +
> + while (fgets(buf, sizeof(buf), maps)) {
> + unsigned long start, end;
> +
> + if (strstr(buf, "[vdso]") == NULL)
> + continue;
> +
> + ret = sscanf(buf, "%lx-%lx", &start, &end);
> + if (ret != 2) {
> + ret = -1;
> + pr_err("Can't find vDSO bounds\n");
> + break;
> + }
> +
> + pr_debug("vdso: Got area %lx-%lx\n", start, end);
> +
> + t->vma_start = start;
> + t->vma_end = end;
> + ret = arch_parse_vdso((void *)start, end - start, t);
> + break;
> + }
> +
> + fclose(maps);
> + return ret;
> +}
> +
> +int arch_read_vdso_layout(symtable_t *t)
> +{
> + unsigned int i, nr_entries = 0;
> + VdsoSymbolEntry *symbol;
> + int ret = -1, fd;
> +
> + fd = open_image(CR_FD_VDSO, O_RSTR);
> + if (fd < 0)
> + goto err;
> +
> + INIT_SYMTABLE(t);
> +
> + while (1) {
> + ret = pb_read_one_eof(fd, &symbol, PB_VDSO);
> + if (ret < 0)
> + goto err;
> + else if (ret == 0)
> + break;
> +
> + pr_debug("vdso: read name %s offset %lx\n",
> + symbol->name, symbol->offset);
> +
> + i = arch_vdso_get_symbol_index(symbol->name);
> + if (i == VDSO_SYMBOL_MAX) {
> + pr_err("vDSO symbol %s is not reconized\n",
> + symbol->name);
> + goto err;
> + }
> +
> + strncpy(t->sym[i].name, symbol->name, sizeof(t->sym[i].name));
> + t->sym[i].name[sizeof(t->sym[i].name) - 1] = '\0';
> + t->sym[i].offset = symbol->offset;
> +
> + vdso_symbol_entry__free_unpacked(symbol, NULL);
> +
> + nr_entries++;
> + }
> +
> + /* Verify read data */
> + if (nr_entries) {
> + for (i = 0; i < ARRAY_SIZE(t->sym); i++) {
> + if (!arch_is_vdso_symbol_valid(&t->sym[i])) {
> + pr_err("Invalid vDSO data for symbol %s\n",
> + arch_vdso_get_symbol_name(i));
> + goto err;
> + }
> + }
> + }
> + ret = 0;
> +
> +err:
> + close(fd);
> + return ret;
> +}
> +
> +int arch_dump_vdso_layout(void)
> +{
> + VdsoSymbolEntry symbol = VDSO_SYMBOL_ENTRY__INIT;
> + symtable_t t = { };
> + int ret, fd, i;
> +
> + ret = arch_fill_self_vdso(&t);
> + if (ret)
> + goto err;
> +
> + ret = -1;
> + fd = open_image(CR_FD_VDSO, O_DUMP);
> + if (fd < 0)
> + goto err;
> +
> + ret = 0;
> + for (i = 0; ret == 0 && i < VDSO_SYMBOL_MAX; i++) {
> + symbol.name = t.sym[i].name;
> + symbol.offset = t.sym[i].offset;
> + ret = pb_write_one(fd, &symbol, PB_VDSO);
> + }
> +
> + close(fd);
> +err:
> + return ret;
> +}
> +
> int get_task_regs(pid_t pid, CoreEntry *core, const struct parasite_ctl *ctl)
> {
> struct xsave_struct xsave = { };
> diff --git a/arch/x86/vdso.c b/arch/x86/vdso.c
> new file mode 100644
> index 0000000..6c088fd
> --- /dev/null
> +++ b/arch/x86/vdso.c
> @@ -0,0 +1,206 @@
> +/*
> + * WARN This file is used in several places over the project.
> + * Please don't add any non PIE function.
> + */
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <string.h>
> +
> +#include <sys/types.h>
> +
> +#include "asm/elf.h"
> +
> +#include "compiler.h"
> +#include "xmalloc.h"
> +#include "vdso.h"
> +
> +#ifdef LOG_PREFIX
> +#undef LOG_PREFIX
> +#endif
> +#define LOG_PREFIX "vdso: "
> +
> +static const char *vdso_x86_symbols[VDSO_SYMBOL_MAX] = {
> + [VDSO_SYMBOL_GETTIMEOFDAY] = "__vdso_gettimeofday",
> + [VDSO_SYMBOL_GETCPU] = "__vdso_getcpu",
> + [VDSO_SYMBOL_CLOCK_GETTIME] = "__vdso_clock_gettime",
> + [VDSO_SYMBOL_TIME] = "__vdso_time",
> +};
> +
> +const char *arch_vdso_get_symbol_name(unsigned int index)
> +{
> + if (index < ARRAY_SIZE(vdso_x86_symbols))
> + return vdso_x86_symbols[index];
> +
> + return "Unknown";
> +}
> +
> +unsigned int arch_vdso_get_symbol_index(char *symbol)
> +{
> + unsigned int i;
> +
> + /*
> + * It's not a problem for small size of array, but
> + * be ready to change it for some faster algo.
> + */
> + for (i = 0; symbol && i < ARRAY_SIZE(vdso_x86_symbols); i++) {
> + if (!strcmp(symbol, vdso_x86_symbols[i]))
> + return i;
> + }
> +
> + return VDSO_SYMBOL_MAX;
> +}
> +
> +static const char vdso_ident[] = {
> + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
> + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
> +};
> +
> +typedef struct {
> + u16 movabs;
> + u64 imm64;
> + u16 jmp_rax;
> + u32 guards;
> +} __packed jmp_t;
> +
> +int arch_proxify_vdso(void *base_to, void *base_from, symtable_t *to, symtable_t *from)
I think this function should be in the next patch
> +{
> + jmp_t jmp = {
> + .movabs = 0xb848,
> + .jmp_rax = 0xe0ff,
> + .guards = 0xcccccccc,
> + };
> + unsigned int i;
> +
> + /*
> + * We support forward jumps only, for simplicity
> + * reason, thus the caller must provide us validated
> + * data only.
> + */
> + for (i = 0; i < ARRAY_SIZE(to->sym); i++) {
> + if (arch_is_vdso_symbol_empty(&from->sym[i]))
> + continue;
> +
> + pr_debug("jmp: %lx/%lx -> %lx/%lx\n",
> + (unsigned long)base_from, from->sym[i].offset,
> + (unsigned long)base_to, to->sym[i].offset);
> +
> + jmp.imm64 = (unsigned long)base_to + to->sym[i].offset;
> +
> + memcpy((void *)(base_from + from->sym[i].offset), &jmp, sizeof(jmp));
> + }
> +
> + return 0;
> +}
> +
> +int arch_parse_vdso(char *mem, size_t size, symtable_t *t)
> +{
> + Elf64_Ehdr *ehdr = (void *)mem;
> + Elf64_Shdr *shdr, *shdr_strtab;
> + Elf64_Shdr *shdr_dynsym, *shdr_dynstr;
> + Elf64_Phdr *phdr;
> + Elf64_Shdr *text;
> + Elf64_Sym *sym;
> +
> + char *section_names, *dynsymbol_names;
> +
> + unsigned long base = VDSO_BAD_ADDR;
> + unsigned int i, j, k;
> +
> + BUILD_BUG_ON(sizeof(vdso_ident) != sizeof(ehdr->e_ident));
> +
> + /*
> + * Make sure it's a file we support.
> + */
> + for (i = 0; i < sizeof(vdso_ident); i++) {
> + if (ehdr->e_ident[i] != vdso_ident[i]) {
> + pr_err("Elf header magic mismatch\n");
> + goto err;
> + }
> + }
> +
> + /*
> + * Figure out base virtual address.
> + */
> + phdr = (void *)&mem[ehdr->e_phoff];
> + for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
> + if (phdr->p_type == PT_LOAD) {
> + base = phdr->p_vaddr;
> + break;
> + }
> + }
> + if (base != VDSO_BAD_ADDR) {
> + pr_debug("Base address %lx\n", base);
> + } else {
> + pr_err("No base address found\n");
> + goto err;
> + }
> +
> + /*
> + * Where the section names lays.
> + */
> + if (ehdr->e_shstrndx == SHN_UNDEF) {
> + pr_err("Section names are not found\n");
> + goto err;
> + }
> +
> + shdr = (void *)&mem[ehdr->e_shoff];
> + shdr_strtab = &shdr[ehdr->e_shstrndx];
> + section_names = (void *)&mem[shdr_strtab->sh_offset];
> +
> + shdr_dynsym = shdr_dynstr = text = NULL;
> +
> + shdr = (void *)&mem[ehdr->e_shoff];
> + for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
> +
> + pr_debug("section: %2d -> %s\n",
> + i, §ion_names[shdr->sh_name]);
> +
> + if (shdr->sh_type == SHT_DYNSYM &&
> + strcmp(§ion_names[shdr->sh_name],
> + ".dynsym") == 0) {
> + shdr_dynsym = shdr;
> + } else if (shdr->sh_type == SHT_STRTAB &&
> + strcmp(§ion_names[shdr->sh_name],
> + ".dynstr") == 0) {
> + shdr_dynstr = shdr;
> + } else if (shdr->sh_type == SHT_PROGBITS &&
> + strcmp(§ion_names[shdr->sh_name],
> + ".text") == 0) {
> + text = shdr;
> + }
> + }
> +
> + if (!shdr_dynsym || !shdr_dynstr || !text) {
> + pr_err("No required sections found\n");
> + goto err;
> + }
> +
> + dynsymbol_names = (void *)&mem[shdr_dynstr->sh_offset];
> +
> + /*
> + * Walk over global symbols and choose ones we need.
> + */
> + j = shdr_dynsym->sh_size / sizeof(*sym);
> + sym = (void *)&mem[shdr_dynsym->sh_offset];
> +
> + for (i = 0; i < j; i++, sym++) {
> + if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
> + ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
> + continue;
> +
> + k = arch_vdso_get_symbol_index(&dynsymbol_names[sym->st_name]);
> + if (k != VDSO_SYMBOL_MAX) {
> + memcpy(t->sym[k].name, vdso_x86_symbols[k],
> + sizeof(t->sym[k].name));
> + t->sym[k].offset = (unsigned long)sym->st_value - base;
> + }
> + pr_debug("%csymbol: %#-16lx %2d %s\n",
> + k != VDSO_SYMBOL_MAX ? '+' : '-',
> + t->sym[k].offset, sym->st_shndx, t->sym[k].name);
> + }
> +
> + return 0;
> +err:
> + return -1;
> +}
> diff --git a/cr-dump.c b/cr-dump.c
> index 5743551..3e9d4bf 100644
> --- a/cr-dump.c
> +++ b/cr-dump.c
> @@ -59,6 +59,7 @@
> #include "file-lock.h"
> #include "page-xfer.h"
> #include "kerndat.h"
> +#include "vdso.h"
>
> #include "asm/dump.h"
>
> @@ -1583,6 +1584,9 @@ int cr_dump_tasks(pid_t pid, const struct cr_options *opts)
> if (collect_sockets(pid))
> goto err;
>
> + if (arch_dump_vdso_layout())
> + goto err;
> +
> glob_fdset = cr_glob_fdset_open(O_DUMP);
> if (!glob_fdset)
> goto err;
> diff --git a/include/vdso.h b/include/vdso.h
> new file mode 100644
> index 0000000..02d60c1
> --- /dev/null
> +++ b/include/vdso.h
> @@ -0,0 +1,102 @@
> +#ifndef __CR_VDSO_H__
> +#define __CR_VDSO_H__
> +
> +#include <stdbool.h>
> +#include <string.h>
> +#include <sys/types.h>
> +
> +#include "asm/int.h"
> +#include "compiler.h"
> +
> +/*
> + * This is a minimal amount of symbols
> + * we should support at the moment.
> + */
> +enum {
> + VDSO_SYMBOL_GETTIMEOFDAY = 0,
> + VDSO_SYMBOL_GETCPU,
> + VDSO_SYMBOL_CLOCK_GETTIME,
> + VDSO_SYMBOL_TIME,
> +
> + VDSO_SYMBOL_MAX
> +};
> +
> +#define VDSO_BAD_ADDR (-1ul)
> +
> +typedef struct symbol_s {
> + char name[32];
> + unsigned long offset;
> +} symbol_t;
> +
> +#define SYMBOL_INIT \
> + { .offset = VDSO_BAD_ADDR, }
> +
> +typedef struct symtable_s {
> + unsigned long vma_start;
> + unsigned long vma_end;
> + symbol_t sym[VDSO_SYMBOL_MAX];
> +} symtable_t;
> +
> +#define symtable_vma_size(s) \
> + (unsigned long)((s)->vma_end - (s)->vma_start)
> +
> +#define SYMTABLE_INIT \
> + { \
> + .vma_start = VDSO_BAD_ADDR, \
> + .vma_end = VDSO_BAD_ADDR, \
> + .sym = { \
> + [0 ... VDSO_SYMBOL_MAX - 1] = \
> + (symbol_t) SYMBOL_INIT, \
> + }, \
> + }
> +
> +#define INIT_SYMTABLE(symtable) \
> + *(symtable) = (symtable_t) SYMTABLE_INIT
> +
> +static inline bool arch_is_vdso_symbol_empty(symbol_t *s)
> +{
> + return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
> +}
> +
> +static inline bool arch_is_vdso_symbol_valid(symbol_t *s)
> +{
> + if (!arch_is_vdso_symbol_empty(s)) {
> + if (s->offset == VDSO_BAD_ADDR ||
> + s->name[0] == '\0')
> + return false;
> + }
> +
> + return true;
> +}
> +
> +static inline bool arch_is_vdso_symbols_empty(symtable_t *t)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < ARRAY_SIZE(t->sym); i++) {
> + if (!arch_is_vdso_symbol_empty(&t->sym[i]))
> + return false;
> + }
> +
> + return true;
> +}
> +
> +#if CONFIG_X86_64
> +extern const char *arch_vdso_get_symbol_name(unsigned int index);
> +extern unsigned int arch_vdso_get_symbol_index(char *symbol);
> +extern int arch_fill_self_vdso(symtable_t *t);
> +extern int arch_read_vdso_layout(symtable_t *t);
> +extern int arch_dump_vdso_layout(void);
> +extern int arch_parse_vdso(char *mem, size_t size, symtable_t *t);
> +extern int arch_proxify_vdso(void *base_to, void *base_from, symtable_t *to, symtable_t *from);
> +#else
> +extern const char *arch_vdso_get_symbol_name(unsigned int index) { return NULL; }
> +static inline unsigned int arch_vdso_get_symbol_index(char *symbol) { return VDSO_SYMBOL_MAX; };
> +static inline int arch_fill_self_vdso(symtable_t *t) { return 0; }
> +static inline int arch_read_vdso_layout(symtable_t *t) { return 0; }
> +static inline int arch_dump_vdso_layout(void) { }
> +static inline int arch_parse_vdso(char *mem, size_t size, symtable_t *t) { return 0; }
> +static inline int arch_proxify_vdso(void *base_to, void *base_from, symtable_t *to, symtable_t *from) { return 0; }
> +#endif
> +
> +#endif /* __CR_VDSO_H__ */
More information about the CRIU
mailing list