[CRIU] [PATCH 3/4] vdso: Rework vdso processing files
Laurent Dufour
ldufour at linux.vnet.ibm.com
Thu Sep 3 07:26:30 PDT 2015
There were multiple copy of the same code spread over the different
architectures handling the vDSO.
This patch is merging the duplicated code in arch/*/vdso-pie.c and
arch/*/include/asm/vdso.h in the common files and let only the architecture
specific part in the arch/*/* files.
The file are now organized this way:
include/asm-generic/vdso.h
contains basic definition which could be overwritten by
architectures.
arch/*/include/asm/vdso.h
contains per architecture definitions.
It may includes include/asm-generic/vdso.h
pie/util-vdso.c
include/util-vdso.h
These files contains code and definitions common to both criu and
the parasite code.
The file include/util-vdso.h includes arch/*/include/asm/vdso.h.
pie/parsite-vdso.c
include/parasite-vdso.h
contains code and definition specific to the parasite code handling
the vDSO.
The file include/parasite-vdso.h includes include/util-vdso.h.
arch/*/vdso-pie.c
contains the architecture specific code installing the vDSO
trampoline.
vdso.c
include/vdso.h
contains code and definition specific to the criu code handling the
vDSO.
The file include/vdso.h includes include/util-vdso.h.
CC: Christopher Covington <cov at codeaurora.org>
CC: Pavel Emelyanov <xemul at parallels.com>
Signed-off-by: Laurent Dufour <ldufour at linux.vnet.ibm.com>
---
Makefile | 10 +-
arch/aarch64/include/asm/vdso.h | 161 ++-------------
arch/aarch64/vdso-pie.c | 401 +-------------------------------------
arch/ppc64/include/asm/vdso.h | 184 +++---------------
arch/ppc64/vdso-pie.c | 409 +-------------------------------------
arch/x86/crtools.c | 1 +
arch/x86/include/asm/vdso.h | 159 ++-------------
arch/x86/vdso-pie.c | 422 +---------------------------------------
include/asm-generic/vdso.h | 12 ++
include/parasite-syscall.h | 5 -
include/parasite-vdso.h | 96 +++++++++
include/restorer.h | 2 +-
include/util-vdso.h | 69 +++++++
include/vdso.h | 14 +-
pie/Makefile | 2 +
pie/parasite-vdso.c | 218 +++++++++++++++++++++
pie/parasite.c | 2 +-
pie/util-vdso.c | 211 ++++++++++++++++++++
18 files changed, 687 insertions(+), 1691 deletions(-)
create mode 100644 include/asm-generic/vdso.h
create mode 100644 include/parasite-vdso.h
create mode 100644 include/util-vdso.h
create mode 100644 pie/parasite-vdso.c
create mode 100644 pie/util-vdso.c
diff --git a/Makefile b/Makefile
index 7f5c89091a74..a926b64795de 100644
--- a/Makefile
+++ b/Makefile
@@ -234,15 +234,7 @@ lib: $(VERSION_HEADER) config built-in.o
$(Q) $(MAKE) $(build)=lib all
ifeq ($(VDSO),y)
-$(ARCH_DIR)/vdso-pie.o: pie
- $(Q) $(MAKE) $(build)=pie $(ARCH_DIR)/vdso-pie.o
-PROGRAM-BUILTINS += $(ARCH_DIR)/vdso-pie.o
-ifeq ($(SRCARCH),aarch64)
-PROGRAM-BUILTINS += $(ARCH_DIR)/intraprocedure.o
-endif
-ifeq ($(SRCARCH),ppc64)
-PROGRAM-BUILTINS += $(ARCH_DIR)/vdso-trampoline.o
-endif
+PROGRAM-BUILTINS += pie/util-vdso.o
endif
PROGRAM-BUILTINS += pie/util-fd.o
diff --git a/arch/aarch64/include/asm/vdso.h b/arch/aarch64/include/asm/vdso.h
index f8d1556de0db..920cebd22095 100644
--- a/arch/aarch64/include/asm/vdso.h
+++ b/arch/aarch64/include/asm/vdso.h
@@ -1,158 +1,25 @@
#ifndef __CR_ASM_VDSO_H__
#define __CR_ASM_VDSO_H__
-#include <sys/types.h>
-
#include "asm/int.h"
-#include "protobuf/vma.pb-c.h"
-
-struct parasite_ctl;
-struct vm_area_list;
-
-#define VDSO_PROT (PROT_READ | PROT_EXEC)
-#define VVAR_PROT (PROT_READ)
-
-#define VDSO_BAD_ADDR (-1ul)
-#define VVAR_BAD_ADDR VDSO_BAD_ADDR
-#define VDSO_BAD_PFN (-1ull)
-#define VVAR_BAD_PFN VDSO_BAD_PFN
-
-struct vdso_symbol {
- char name[32];
- unsigned long offset;
-};
-
-#define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, }
-
-/* Check if symbol present in symtable */
-static inline bool vdso_symbol_empty(struct vdso_symbol *s)
-{
- return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
-}
+#include "asm-generic/vdso.h"
/*
* This is a minimal amount of symbols
* we should support at the moment.
*/
-enum {
- VDSO_SYMBOL_CLOCK_GETRES,
- VDSO_SYMBOL_CLOCK_GETTIME,
- VDSO_SYMBOL_GETTIMEOFDAY,
- VDSO_SYMBOL_RT_SIGRETURN,
-
- VDSO_SYMBOL_MAX
-};
-
-struct vdso_symtable {
- unsigned long vma_start;
- unsigned long vma_end;
- unsigned long vvar_start;
- unsigned long vvar_end;
- struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
-};
-
-#define VDSO_SYMTABLE_INIT \
- { \
- .vma_start = VDSO_BAD_ADDR, \
- .vma_end = VDSO_BAD_ADDR, \
- .vvar_start = VVAR_BAD_ADDR, \
- .vvar_end = VVAR_BAD_ADDR, \
- .symbols = { \
- [0 ... VDSO_SYMBOL_MAX - 1] = \
- (struct vdso_symbol)VDSO_SYMBOL_INIT, \
- }, \
- }
-
-/* Size of VMA associated with vdso */
-static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
-{
- return t->vma_end - t->vma_start;
-}
-
-static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
-{
- return t->vvar_end - t->vvar_start;
-}
-/*
- * Special mark which allows to identify runtime vdso where
- * calls from proxy vdso are redirected. This mark usually
- * placed at the start of vdso area where Elf header lives.
- * Since such runtime vdso is solevey used by proxy and
- * nobody else is supposed to access it, it's more-less
- * safe to screw the Elf header with @signature and
- * @proxy_addr.
- *
- * The @proxy_addr deserves a few comments. When we redirect
- * the calls from proxy to runtime vdso, on next checkpoint
- * it won't be possible to find which VMA is proxy, thus
- * we save its address in the member.
- */
-struct vdso_mark {
- u64 signature;
- unsigned long proxy_vdso_addr;
-
- unsigned long version;
-
- /*
- * In case of new vDSO format the VVAR area address
- * neeed for easier discovering where it lives without
- * relying on procfs output.
- */
- unsigned long proxy_vvar_addr;
-};
-
-#define VDSO_MARK_SIGNATURE (0x6f73647675697263ULL) /* Magic number (criuvdso) */
-#define VDSO_MARK_SIGNATURE_V2 (0x4f53447675697263ULL) /* Magic number (criuvDSO) */
-#define VDSO_MARK_CUR_VERSION (2)
-
-static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
-{
- struct vdso_mark *m = where;
-
- m->signature = VDSO_MARK_SIGNATURE_V2;
- m->proxy_vdso_addr = proxy_vdso_addr;
- m->version = VDSO_MARK_CUR_VERSION;
- m->proxy_vvar_addr = proxy_vvar_addr;
-}
-
-static inline bool is_vdso_mark(void *addr)
-{
- struct vdso_mark *m = addr;
-
- if (m->signature == VDSO_MARK_SIGNATURE_V2) {
- /*
- * New format
- */
- return true;
- } else if (m->signature == VDSO_MARK_SIGNATURE) {
- /*
- * Old format -- simply extend the mark up
- * to the version we support.
- */
- vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
- return true;
- }
- return false;
-}
-
-#define VDSO_SYMBOL_CLOCK_GETRES_NAME "__kernel_clock_getres"
-#define VDSO_SYMBOL_CLOCK_GETTIME_NAME "__kernel_clock_gettime"
-#define VDSO_SYMBOL_GETTIMEOFDAY_NAME "__kernel_gettimeofday"
-#define VDSO_SYMBOL_RT_SIGRETURN_NAME "__kernel_rt_sigreturn"
-
-extern struct vdso_symtable vdso_sym_rt;
-extern u64 vdso_pfn;
-
-extern int vdso_init(void);
-extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
-extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
-extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas);
-
-extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
-extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
- struct vm_area_list *vma_area_list);
-extern void write_intraprocedure_branch(void *to, void *from);
+#define VDSO_SYMBOL_MAX 4
+
+#define ARCH_VDSO_SYMBOLS \
+ "__kernel_clock_getres", \
+ "__kernel_clock_gettime", \
+ "__kernel_gettimeofday", \
+ "__kernel_rt_sigreturn"
+
+struct vdso_symtable;
+extern int vdso_redirect_calls(unsigned long base_to,
+ unsigned long base_from,
+ struct vdso_symtable *to,
+ struct vdso_symtable *from);
#endif /* __CR_ASM_VDSO_H__ */
diff --git a/arch/aarch64/vdso-pie.c b/arch/aarch64/vdso-pie.c
index c6558378db1d..0f06c2d191d1 100644
--- a/arch/aarch64/vdso-pie.c
+++ b/arch/aarch64/vdso-pie.c
@@ -1,23 +1,10 @@
-#include <stdlib.h>
-#include <stdio.h>
#include <unistd.h>
-#include <string.h>
-#include <elf.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
#include "asm/string.h"
#include "asm/types.h"
-#include "compiler.h"
#include "syscall.h"
-#include "image.h"
-#include "vdso.h"
-#include "vma.h"
+#include "parasite-vdso.h"
#include "log.h"
#include "bug.h"
@@ -26,7 +13,7 @@
#endif
#define LOG_PREFIX "vdso: "
-int vdso_redirect_calls(void *base_to, void *base_from,
+int vdso_redirect_calls(unsigned long base_to, unsigned long base_from,
struct vdso_symtable *to,
struct vdso_symtable *from)
{
@@ -37,8 +24,8 @@ int vdso_redirect_calls(void *base_to, void *base_from,
continue;
pr_debug("br: %lx/%lx -> %lx/%lx (index %d)\n",
- (unsigned long)base_from, from->symbols[i].offset,
- (unsigned long)base_to, to->symbols[i].offset, i);
+ base_from, from->symbols[i].offset,
+ base_to, to->symbols[i].offset, i);
write_intraprocedure_branch(base_to + to->symbols[i].offset,
base_from + from->symbols[i].offset);
@@ -46,383 +33,3 @@ int vdso_redirect_calls(void *base_to, void *base_from,
return 0;
}
-
-
-/* Check if pointer is out-of-bound */
-static bool __ptr_oob(void *ptr, void *start, size_t size)
-{
- void *end = (void *)((unsigned long)start + size);
- return ptr > end || ptr < start;
-}
-
-/*
- * Elf hash, see format specification.
- */
-static unsigned long elf_hash(const unsigned char *name)
-{
- unsigned long h = 0, g;
-
- while (*name) {
- h = (h << 4) + *name++;
- g = h & 0xf0000000ul;
- if (g)
- h ^= g >> 24;
- h &= ~g;
- }
- return h;
-}
-
-int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
-{
- Elf64_Phdr *dynamic = NULL, *load = NULL;
- Elf64_Ehdr *ehdr = (void *)mem;
- Elf64_Dyn *dyn_strtab = NULL;
- Elf64_Dyn *dyn_symtab = NULL;
- Elf64_Dyn *dyn_strsz = NULL;
- Elf64_Dyn *dyn_syment = NULL;
- Elf64_Dyn *dyn_hash = NULL;
- Elf64_Word *hash = NULL;
- Elf64_Phdr *phdr;
- Elf64_Dyn *d;
-
- Elf64_Word *bucket, *chain;
- Elf64_Word nbucket, nchain;
-
- /*
- * See Elf specification for this magic values.
- */
- const char elf_ident[] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- };
-
- const char *vdso_symbols[VDSO_SYMBOL_MAX] = {
- [VDSO_SYMBOL_CLOCK_GETRES] = VDSO_SYMBOL_CLOCK_GETRES_NAME,
- [VDSO_SYMBOL_CLOCK_GETTIME] = VDSO_SYMBOL_CLOCK_GETTIME_NAME,
- [VDSO_SYMBOL_GETTIMEOFDAY] = VDSO_SYMBOL_GETTIMEOFDAY_NAME,
- [VDSO_SYMBOL_RT_SIGRETURN] = VDSO_SYMBOL_RT_SIGRETURN_NAME,
- };
-
- char *dynsymbol_names;
- unsigned int i, j, k;
-
- BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
-
- pr_debug("Parsing at %lx %lx\n", (long)mem, (long)mem + (long)size);
-
- /*
- * Make sure it's a file we support.
- */
- if (builtin_memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
- pr_err("Elf header magic mismatch\n");
- return -EINVAL;
- }
-
- /*
- * We need PT_LOAD and PT_DYNAMIC here. Each once.
- */
- phdr = (void *)&mem[ehdr->e_phoff];
- for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
- if (__ptr_oob(phdr, mem, size))
- goto err_oob;
- switch (phdr->p_type) {
- case PT_DYNAMIC:
- if (dynamic) {
- pr_err("Second PT_DYNAMIC header\n");
- return -EINVAL;
- }
- dynamic = phdr;
- break;
- case PT_LOAD:
- if (load) {
- pr_err("Second PT_LOAD header\n");
- return -EINVAL;
- }
- load = phdr;
- break;
- }
- }
-
- if (!load || !dynamic) {
- pr_err("One of obligated program headers is missed\n");
- return -EINVAL;
- }
-
- pr_debug("PT_LOAD p_vaddr: %lx\n", (unsigned long)load->p_vaddr);
-
- /*
- * Dynamic section tags should provide us the rest of information
- * needed. Note that we're interested in a small set of tags.
- */
- d = (void *)&mem[dynamic->p_offset];
- for (i = 0; i < dynamic->p_filesz / sizeof(*d); i++, d++) {
- if (__ptr_oob(d, mem, size))
- goto err_oob;
-
- if (d->d_tag == DT_NULL) {
- break;
- } else if (d->d_tag == DT_STRTAB) {
- dyn_strtab = d;
- pr_debug("DT_STRTAB: %p\n", (void *)d->d_un.d_ptr);
- } else if (d->d_tag == DT_SYMTAB) {
- dyn_symtab = d;
- pr_debug("DT_SYMTAB: %p\n", (void *)d->d_un.d_ptr);
- } else if (d->d_tag == DT_STRSZ) {
- dyn_strsz = d;
- pr_debug("DT_STRSZ: %lu\n", (unsigned long)d->d_un.d_val);
- } else if (d->d_tag == DT_SYMENT) {
- dyn_syment = d;
- pr_debug("DT_SYMENT: %lu\n", (unsigned long)d->d_un.d_val);
- } else if (d->d_tag == DT_HASH) {
- dyn_hash = d;
- pr_debug("DT_HASH: %p\n", (void *)d->d_un.d_ptr);
- }
- }
-
- if (!dyn_strtab || !dyn_symtab || !dyn_strsz || !dyn_syment || !dyn_hash) {
- pr_err("Not all dynamic entries are present\n");
- return -EINVAL;
- }
-
- dynsymbol_names = &mem[dyn_strtab->d_un.d_val - load->p_vaddr];
- if (__ptr_oob(dynsymbol_names, mem, size))
- goto err_oob;
-
- hash = (void *)&mem[(unsigned long)dyn_hash->d_un.d_ptr - (unsigned long)load->p_vaddr];
- if (__ptr_oob(hash, mem, size))
- goto err_oob;
-
- nbucket = hash[0];
- nchain = hash[1];
- bucket = &hash[2];
- chain = &hash[nbucket + 2];
-
- pr_debug("nbucket %lu nchain %lu bucket %p chain %p\n",
- (long)nbucket, (long)nchain, bucket, chain);
-
- for (i = 0; i < ARRAY_SIZE(vdso_symbols); i++) {
- k = elf_hash((const unsigned char *)vdso_symbols[i]);
-
- for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) {
- Elf64_Sym *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr];
- char *name;
-
- sym = &sym[j];
- if (__ptr_oob(sym, mem, size))
- continue;
-
- if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
- ELF64_ST_BIND(sym->st_info) != STB_GLOBAL)
- continue;
-
- name = &dynsymbol_names[sym->st_name];
- if (__ptr_oob(name, mem, size))
- continue;
-
- if (builtin_strcmp(name, vdso_symbols[i]))
- continue;
-
- builtin_memcpy(t->symbols[i].name, name, sizeof(t->symbols[i].name));
- t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
- break;
- }
- }
-
- return 0;
-
-err_oob:
- pr_err("Corrupted Elf data\n");
- return -EFAULT;
-}
-
-static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
-{
- unsigned long addr;
-
- pr_debug("Remap %s %lx -> %lx\n", who, from, to);
-
- addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
- if (addr != to) {
- pr_err("Unable to remap %lx -> %lx %lx\n",
- from, to, addr);
- return -1;
- }
-
- return 0;
-}
-
-/* Park runtime vDSO in some safe place where it can be accessible from restorer */
-int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
-{
- int ret;
-
- BUG_ON((vdso_vma_size(sym_rt) + vvar_vma_size(sym_rt)) < park_size);
-
- if (sym_rt->vvar_start != VDSO_BAD_ADDR) {
- if (sym_rt->vma_start < sym_rt->vvar_start) {
- ret = vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- park_at += vdso_vma_size(sym_rt);
- ret |= vdso_remap("rt-vvar", sym_rt->vvar_start,
- park_at, vvar_vma_size(sym_rt));
- } else {
- ret = vdso_remap("rt-vvar", sym_rt->vvar_start,
- park_at, vvar_vma_size(sym_rt));
- park_at += vvar_vma_size(sym_rt);
- ret |= vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- }
- } else
- ret = vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- return ret;
-}
-
-int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas)
-{
- VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
- struct vdso_symtable s = VDSO_SYMTABLE_INIT;
- bool remap_rt = false;
-
- /*
- * Figue out which kind of vdso tuple we get.
- */
- if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
- vma_vdso = &vmas[index];
- else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
- vma_vvar = &vmas[index];
-
- if (index < (nr_vmas - 1)) {
- if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
- vma_vdso = &vmas[index + 1];
- else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
- vma_vvar = &vmas[index + 1];
- }
-
- if (!vma_vdso) {
- pr_err("Can't find vDSO area in image\n");
- return -1;
- }
-
- /*
- * vDSO mark overwrites Elf program header of proxy vDSO thus
- * it must never ever be greater in size.
- */
- BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
-
- /*
- * Find symbols in vDSO zone read from image.
- */
- if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
- return -1;
-
- /*
- * Proxification strategy
- *
- * - There might be two vDSO zones: vdso code and optionally vvar data
- * - To be able to use in-place remapping we need
- *
- * a) Size and order of vDSO zones are to match
- * b) Symbols offsets must match
- * c) Have same number of vDSO zones
- */
- if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
- if (s.symbols[i].offset != sym_rt->symbols[i].offset)
- break;
- }
-
- if (i == ARRAY_SIZE(s.symbols)) {
- if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
- remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
- if (remap_rt) {
- long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
- long delta_this = vma_vvar->start - vma_vdso->start;
-
- remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
- }
- } else
- remap_rt = true;
- }
- }
-
- pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
- vma_vdso->start, vma_vdso->end,
- vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
- vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
-
- /*
- * Easy case -- the vdso from image has same offsets, order and size
- * as runtime, so we simply remap runtime vdso to dumpee position
- * without generating any proxy.
- *
- * Note we may remap VVAR vdso as well which might not yet been mapped
- * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
- * not touch it anymore.
- */
- if (remap_rt) {
- int ret = 0;
-
- pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
-
- if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
- pr_err("Failed to unmap %s\n", who);
- return -1;
- }
-
- if (vma_vvar) {
- if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
- pr_err("Failed to unmap %s\n", who);
- return -1;
- }
-
- if (vma_vdso->start < vma_vvar->start) {
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
- vdso_rt_parked_at += vdso_vma_size(sym_rt);
- ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
- } else {
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
- vdso_rt_parked_at += vvar_vma_size(sym_rt);
- ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
- }
- } else
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
-
- return ret;
- }
-
- /*
- * Now complex case -- we need to proxify calls. We redirect
- * calls from dumpee vdso to runtime vdso, making dumpee
- * to operate as proxy vdso.
- */
- pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
-
- /*
- * Don't forget to shift if vvar is before vdso.
- */
- if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
- sym_rt->vvar_start < sym_rt->vma_start)
- vdso_rt_parked_at += vvar_vma_size(sym_rt);
-
- if (vdso_redirect_calls((void *)vdso_rt_parked_at,
- (void *)vma_vdso->start,
- sym_rt, &s)) {
- pr_err("Failed to proxify dumpee contents\n");
- return -1;
- }
-
- /*
- * Put a special mark into runtime vdso, thus at next checkpoint
- * routine we could detect this vdso and do not dump it, since
- * it's auto-generated every new session if proxy required.
- */
- sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE);
- vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
- sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT);
- return 0;
-}
diff --git a/arch/ppc64/include/asm/vdso.h b/arch/ppc64/include/asm/vdso.h
index 8d089dde3a5d..ed94e4cf0160 100644
--- a/arch/ppc64/include/asm/vdso.h
+++ b/arch/ppc64/include/asm/vdso.h
@@ -1,172 +1,34 @@
#ifndef __CR_ASM_VDSO_H__
#define __CR_ASM_VDSO_H__
-#include <sys/types.h>
-
#include "asm/int.h"
-#include "protobuf/vma.pb-c.h"
-
-struct parasite_ctl;
-struct vm_area_list;
-
-#define VDSO_PROT (PROT_READ | PROT_EXEC)
-#define VVAR_PROT (PROT_READ)
-
-#define VDSO_BAD_ADDR (-1ul)
-#define VVAR_BAD_ADDR VDSO_BAD_ADDR
-#define VDSO_BAD_PFN (-1ull)
-#define VVAR_BAD_PFN VDSO_BAD_PFN
-
-struct vdso_symbol {
- char name[32];
- unsigned long offset;
-};
-
-#define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, }
-
-/* Check if symbol present in symtable */
-static inline bool vdso_symbol_empty(struct vdso_symbol *s)
-{
- return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
-}
+#include "asm-generic/vdso.h"
-/*
- * Pick from kernel file arch/powerpc/kernel/vdso64/vdso64.lds.S
+/* This definition is used in pie/util-vdso.c to initialize the vdso symbol
+ * name string table 'vdso_symbols'
+ *
+ * Poke from kernel file arch/powerpc/kernel/vdso64/vdso64.lds.S
*
* Note that '__kernel_datapage_offset' is not a service but mostly a data
* inside the text page which should not be used as is from user space.
*/
-enum {
- VDSO_SYMBOL_CLOCK_GETRES,
- VDSO_SYMBOL_CLOCK_GETTIME,
- VDSO_SYMBOL_GET_SYSCALL_MAP,
- VDSO_SYMBOL_GET_TBFREQ,
- VDSO_SYMBOL_GETCPU,
- VDSO_SYMBOL_GETTIMEOFDAY,
- VDSO_SYMBOL_SIGTRAMP_RT64,
- VDSO_SYMBOL_SYNC_DICACHE,
- VDSO_SYMBOL_SYNC_DICACHE_P5,
- VDSO_SYMBOL_TIME,
-
- VDSO_SYMBOL_MAX
-};
-
-#define VDSO_SYMBOL_CLOCK_GETRES_NAME "__kernel_clock_getres"
-#define VDSO_SYMBOL_CLOCK_GETTIME_NAME "__kernel_clock_gettime"
-#define VDSO_SYMBOL_GET_SYSCALL_MAP_NAME "__kernel_get_syscall_map"
-#define VDSO_SYMBOL_GET_TBFREQ_NAME "__kernel_get_tbfreq"
-#define VDSO_SYMBOL_GETCPU_NAME "__kernel_getcpu"
-#define VDSO_SYMBOL_GETTIMEOFDAY_NAME "__kernel_gettimeofday"
-#define VDSO_SYMBOL_SIGTRAMP_RT64_NAME "__kernel_sigtramp_rt64"
-#define VDSO_SYMBOL_SYNC_DICACHE_NAME "__kernel_sync_dicache"
-#define VDSO_SYMBOL_SYNC_DICACHE_P5_NAME "__kernel_sync_dicache_p5"
-#define VDSO_SYMBOL_TIME_NAME "__kernel_time"
-
-struct vdso_symtable {
- unsigned long vma_start;
- unsigned long vma_end;
- unsigned long vvar_start;
- unsigned long vvar_end;
- struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
-};
-
-#define VDSO_SYMTABLE_INIT \
- { \
- .vma_start = VDSO_BAD_ADDR, \
- .vma_end = VDSO_BAD_ADDR, \
- .vvar_start = VVAR_BAD_ADDR, \
- .vvar_end = VVAR_BAD_ADDR, \
- .symbols = { \
- [0 ... VDSO_SYMBOL_MAX - 1] = \
- (struct vdso_symbol)VDSO_SYMBOL_INIT, \
- }, \
- }
-
-/* Size of VMA associated with vdso */
-static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
-{
- return t->vma_end - t->vma_start;
-}
-
-static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
-{
- return t->vvar_end - t->vvar_start;
-}
-/*
- * Special mark which allows to identify runtime vdso where
- * calls from proxy vdso are redirected. This mark usually
- * placed at the start of vdso area where Elf header lives.
- * Since such runtime vdso is solevey used by proxy and
- * nobody else is supposed to access it, it's more-less
- * safe to screw the Elf header with @signature and
- * @proxy_addr.
- *
- * The @proxy_addr deserves a few comments. When we redirect
- * the calls from proxy to runtime vdso, on next checkpoint
- * it won't be possible to find which VMA is proxy, thus
- * we save its address in the member.
- */
-struct vdso_mark {
- u64 signature;
- unsigned long proxy_vdso_addr;
-
- unsigned long version;
-
- /*
- * In case of new vDSO format the VVAR area address
- * neeed for easier discovering where it lives without
- * relying on procfs output.
- */
- unsigned long proxy_vvar_addr;
-};
-
-#define VDSO_MARK_SIGNATURE (0x6f73647675697263ULL) /* Magic number (criuvdso) */
-#define VDSO_MARK_SIGNATURE_V2 (0x4f53447675697263ULL) /* Magic number (criuvDSO) */
-#define VDSO_MARK_CUR_VERSION (2)
-
-static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
-{
- struct vdso_mark *m = where;
-
- m->signature = VDSO_MARK_SIGNATURE_V2;
- m->proxy_vdso_addr = proxy_vdso_addr;
- m->version = VDSO_MARK_CUR_VERSION;
- m->proxy_vvar_addr = proxy_vvar_addr;
-}
-
-static inline bool is_vdso_mark(void *addr)
-{
- struct vdso_mark *m = addr;
-
- if (m->signature == VDSO_MARK_SIGNATURE_V2) {
- /*
- * New format
- */
- return true;
- } else if (m->signature == VDSO_MARK_SIGNATURE) {
- /*
- * Old format -- simply extend the mark up
- * to the version we support.
- */
- vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
- return true;
- }
- return false;
-}
-
-
-extern struct vdso_symtable vdso_sym_rt;
-extern u64 vdso_pfn;
-
-extern int vdso_init(void);
-extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
-extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
-extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas);
-
-extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
- struct vm_area_list *vma_area_list);
-extern void write_intraprocedure_branch(void *to, void *from);
+#define VDSO_SYMBOL_MAX 10
+#define ARCH_VDSO_SYMBOLS \
+ "__kernel_clock_getres", \
+ "__kernel_clock_gettime", \
+ "__kernel_get_syscall_map", \
+ "__kernel_get_tbfreq", \
+ "__kernel_getcpu", \
+ "__kernel_gettimeofday", \
+ "__kernel_sigtramp_rt64", \
+ "__kernel_sync_dicache", \
+ "__kernel_sync_dicache_p5", \
+ "__kernel_time"
+
+struct vdso_symtable;
+extern int vdso_redirect_calls(unsigned long base_to,
+ unsigned long base_from,
+ struct vdso_symtable *to,
+ struct vdso_symtable *from);
#endif /* __CR_ASM_VDSO_H__ */
diff --git a/arch/ppc64/vdso-pie.c b/arch/ppc64/vdso-pie.c
index a77acf1efd26..77069917f3b0 100644
--- a/arch/ppc64/vdso-pie.c
+++ b/arch/ppc64/vdso-pie.c
@@ -1,21 +1,10 @@
-#include <stdlib.h>
-#include <stdio.h>
#include <unistd.h>
-#include <elf.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
#include "asm/string.h"
#include "asm/types.h"
#include "syscall.h"
-#include "image.h"
-#include "vdso.h"
-#include "vma.h"
+#include "parasite-vdso.h"
#include "log.h"
#include "bug.h"
@@ -139,10 +128,10 @@ static inline void put_trampoline_call(unsigned long at, unsigned long to,
invalidate_caches(at);
}
-static int vdso_redirect_calls(unsigned long base_to,
- unsigned long base_from,
- struct vdso_symtable *to,
- struct vdso_symtable *from)
+int vdso_redirect_calls(unsigned long base_to,
+ unsigned long base_from,
+ struct vdso_symtable *to,
+ struct vdso_symtable *from)
{
unsigned int i;
unsigned long trampoline;
@@ -167,391 +156,3 @@ static int vdso_redirect_calls(unsigned long base_to,
return 0;
}
-
-/* Check if pointer is out-of-bound */
-static bool __ptr_oob(void *ptr, void *start, size_t size)
-{
- void *end = (void *)((unsigned long)start + size);
- return ptr > end || ptr < start;
-}
-
-/*
- * Elf hash, see format specification.
- */
-static unsigned long elf_hash(const unsigned char *name)
-{
- unsigned long h = 0, g;
-
- while (*name) {
- h = (h << 4) + *name++;
- g = h & 0xf0000000ul;
- if (g)
- h ^= g >> 24;
- h &= ~g;
- }
- return h;
-}
-
-#define SET_VDSO_SYM(s) [VDSO_SYMBOL_##s] = VDSO_SYMBOL_##s##_NAME
-const char *vdso_symbols[VDSO_SYMBOL_MAX] = {
- SET_VDSO_SYM(CLOCK_GETRES),
- SET_VDSO_SYM(CLOCK_GETTIME),
- SET_VDSO_SYM(GET_SYSCALL_MAP),
- SET_VDSO_SYM(GET_TBFREQ),
- SET_VDSO_SYM(GETCPU),
- SET_VDSO_SYM(GETTIMEOFDAY),
- SET_VDSO_SYM(SIGTRAMP_RT64),
- SET_VDSO_SYM(SYNC_DICACHE),
- SET_VDSO_SYM(SYNC_DICACHE_P5),
- SET_VDSO_SYM(TIME)
-};
-#define VDSO_SYMBOL(i) vdso_symbols[i]
-
-int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
-{
- Elf64_Phdr *dynamic = NULL, *load = NULL;
- Elf64_Ehdr *ehdr = (void *)mem;
- Elf64_Dyn *dyn_strtab = NULL;
- Elf64_Dyn *dyn_symtab = NULL;
- Elf64_Dyn *dyn_strsz = NULL;
- Elf64_Dyn *dyn_syment = NULL;
- Elf64_Dyn *dyn_hash = NULL;
- Elf64_Word *hash = NULL;
- Elf64_Phdr *phdr;
- Elf64_Dyn *d;
-
- Elf64_Word *bucket, *chain;
- Elf64_Word nbucket, nchain;
-
- /*
- * See Elf specification for this magic values.
- */
- static const char elf_ident[] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- };
-
- char *dynsymbol_names;
- unsigned int i, j, k;
-
- BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
-
- pr_debug("Parsing at %lx %lx\n", (long)mem, (long)mem + (long)size);
-
- /*
- * Make sure it's a file we support.
- */
- if (builtin_memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
- pr_err("Elf header magic mismatch\n");
- return -EINVAL;
- }
-
- /*
- * We need PT_LOAD and PT_DYNAMIC here. Each once.
- */
- phdr = (void *)&mem[ehdr->e_phoff];
- for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
- if (__ptr_oob(phdr, mem, size))
- goto err_oob;
- switch (phdr->p_type) {
- case PT_DYNAMIC:
- if (dynamic) {
- pr_err("Second PT_DYNAMIC header\n");
- return -EINVAL;
- }
- dynamic = phdr;
- break;
- case PT_LOAD:
- if (load) {
- pr_err("Second PT_LOAD header\n");
- return -EINVAL;
- }
- load = phdr;
- break;
- }
- }
-
- if (!load || !dynamic) {
- pr_err("One of obligated program headers is missed\n");
- return -EINVAL;
- }
-
- pr_debug("PT_LOAD p_vaddr: %lx\n", (unsigned long)load->p_vaddr);
-
- /*
- * Dynamic section tags should provide us the rest of information
- * needed. Note that we're interested in a small set of tags.
- */
- d = (void *)&mem[dynamic->p_offset];
- for (i = 0; i < dynamic->p_filesz / sizeof(*d); i++, d++) {
- if (__ptr_oob(d, mem, size))
- goto err_oob;
-
- if (d->d_tag == DT_NULL) {
- break;
- } else if (d->d_tag == DT_STRTAB) {
- dyn_strtab = d;
- pr_debug("DT_STRTAB: %lx\n", (unsigned long)d->d_un.d_ptr);
- } else if (d->d_tag == DT_SYMTAB) {
- dyn_symtab = d;
- pr_debug("DT_SYMTAB: %lx\n", (unsigned long)d->d_un.d_ptr);
- } else if (d->d_tag == DT_STRSZ) {
- dyn_strsz = d;
- pr_debug("DT_STRSZ: %lx\n", (unsigned long)d->d_un.d_val);
- } else if (d->d_tag == DT_SYMENT) {
- dyn_syment = d;
- pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val);
- } else if (d->d_tag == DT_HASH) {
- dyn_hash = d;
- pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
- }
- }
-
- if (!dyn_strtab || !dyn_symtab || !dyn_strsz || !dyn_syment || !dyn_hash) {
- pr_err("Not all dynamic entries are present\n");
- return -EINVAL;
- }
-
- dynsymbol_names = &mem[dyn_strtab->d_un.d_val - load->p_vaddr];
- if (__ptr_oob(dynsymbol_names, mem, size))
- goto err_oob;
-
- hash = (void *)&mem[(unsigned long)dyn_hash->d_un.d_ptr - (unsigned long)load->p_vaddr];
- if (__ptr_oob(hash, mem, size))
- goto err_oob;
-
- nbucket = hash[0];
- nchain = hash[1];
- bucket = &hash[2];
- chain = &hash[nbucket + 2];
-
- pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n",
- (long)nbucket, (long)nchain, (unsigned long)bucket, (unsigned long)chain);
-
- for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
- const char * symbol = VDSO_SYMBOL(i);
- k = elf_hash((const unsigned char *)symbol);
-
- for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) {
- Elf64_Sym *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr];
- char *name;
-
- sym = &sym[j];
- if (__ptr_oob(sym, mem, size))
- continue;
-
- if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
- ELF64_ST_BIND(sym->st_info) != STB_GLOBAL)
- continue;
-
- name = &dynsymbol_names[sym->st_name];
- if (__ptr_oob(name, mem, size))
- continue;
-
- if (builtin_strcmp(name, symbol))
- continue;
-
- builtin_memcpy(t->symbols[i].name, name, sizeof(t->symbols[i].name));
- t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
- break;
- }
- }
-
- return 0;
-
-err_oob:
- pr_err("Corrupted Elf data\n");
- return -EFAULT;
-}
-
-static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
-{
- unsigned long addr;
-
- pr_debug("Remap %s %lx -> %lx\n", who, from, to);
-
- addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
- if (addr != to) {
- pr_err("Unable to remap %lx -> %lx %lx\n",
- from, to, addr);
- return -1;
- }
-
- return 0;
-}
-
-/* Park runtime vDSO in some safe place where it can be accessible from restorer */
-int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
-{
- int ret;
-
- BUG_ON((vdso_vma_size(sym_rt) + vvar_vma_size(sym_rt)) < park_size);
-
- if (sym_rt->vvar_start != VDSO_BAD_ADDR) {
- if (sym_rt->vma_start < sym_rt->vvar_start) {
- ret = vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- park_at += vdso_vma_size(sym_rt);
- ret |= vdso_remap("rt-vvar", sym_rt->vvar_start,
- park_at, vvar_vma_size(sym_rt));
- } else {
- ret = vdso_remap("rt-vvar", sym_rt->vvar_start,
- park_at, vvar_vma_size(sym_rt));
- park_at += vvar_vma_size(sym_rt);
- ret |= vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- }
- } else
- ret = vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- return ret;
-}
-
-int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas)
-{
- VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
- struct vdso_symtable s = VDSO_SYMTABLE_INIT;
- bool remap_rt = false;
-
- /*
- * Figure out which kind of vdso tuple we get.
- */
- if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
- vma_vdso = &vmas[index];
- else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
- vma_vvar = &vmas[index];
-
- if (index < (nr_vmas - 1)) {
- if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
- vma_vdso = &vmas[index + 1];
- else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
- vma_vvar = &vmas[index + 1];
- }
-
- if (!vma_vdso) {
- pr_err("Can't find vDSO area in image\n");
- return -1;
- }
-
- /*
- * vDSO mark overwrites Elf program header of proxy vDSO thus
- * it must never ever be greater in size.
- */
- BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
-
- /*
- * Find symbols in vDSO zone read from image.
- */
- if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
- return -1;
-
- /*
- * Proxification strategy
- *
- * - There might be two vDSO zones: vdso code and optionally vvar data
- * - To be able to use in-place remapping we need
- *
- * a) Size and order of vDSO zones are to match
- * b) Symbols offsets must match
- * c) Have same number of vDSO zones
- */
- if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
- if (s.symbols[i].offset != sym_rt->symbols[i].offset)
- break;
- }
-
- if (i == ARRAY_SIZE(s.symbols)) {
- if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
- remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
- if (remap_rt) {
- long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
- long delta_this = vma_vvar->start - vma_vdso->start;
-
- remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
- }
- } else
- remap_rt = true;
- }
- }
-
- pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
- vma_vdso->start, vma_vdso->end,
- vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
- vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
-
- /*
- * Easy case -- the vdso from image has same offsets, order and size
- * as runtime, so we simply remap runtime vdso to dumpee position
- * without generating any proxy.
- *
- * Note we may remap VVAR vdso as well which might not yet been mapped
- * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
- * not touch it anymore.
- */
- if (remap_rt) {
- int ret = 0;
-
- pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
-
- if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
- pr_err("Failed to unmap %s\n", who);
- return -1;
- }
-
- if (vma_vvar) {
- if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
- pr_err("Failed to unmap %s\n", who);
- return -1;
- }
-
- if (vma_vdso->start < vma_vvar->start) {
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
- vdso_rt_parked_at += vdso_vma_size(sym_rt);
- ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
- } else {
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
- vdso_rt_parked_at += vvar_vma_size(sym_rt);
- ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
- }
- } else
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
-
- return ret;
- }
-
- /*
- * Now complex case -- we need to proxify calls. We redirect
- * calls from dumpee vdso to runtime vdso, making dumpee
- * to operate as proxy vdso.
- */
- pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
-
- /*
- * Don't forget to shift if vvar is before vdso.
- */
- if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
- sym_rt->vvar_start < sym_rt->vma_start)
- vdso_rt_parked_at += vvar_vma_size(sym_rt);
-
- if (vdso_redirect_calls(vdso_rt_parked_at,
- vma_vdso->start,
- sym_rt, &s)) {
- pr_err("Failed to proxify dumpee contents\n");
- return -1;
- }
-
- /*
- * Put a special mark into runtime vdso, thus at next checkpoint
- * routine we could detect this vdso and do not dump it, since
- * it's auto-generated every new session if proxy required.
- */
- sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE);
- vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
- sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT);
- return 0;
-}
diff --git a/arch/x86/crtools.c b/arch/x86/crtools.c
index 48890133a4ff..1d50c07faf8f 100644
--- a/arch/x86/crtools.c
+++ b/arch/x86/crtools.c
@@ -2,6 +2,7 @@
#include <unistd.h>
#include <elf.h>
#include <sys/user.h>
+#include <sys/mman.h>
#include "asm/processor-flags.h"
#include "asm/restorer.h"
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 56761fa3e4be..a1cc9bb9751b 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,159 +1,30 @@
#ifndef __CR_ASM_VDSO_H__
#define __CR_ASM_VDSO_H__
-#include <sys/types.h>
-
#include "asm/int.h"
-#include "protobuf/vma.pb-c.h"
-
-struct parasite_ctl;
-struct vm_area_list;
-
-#define VDSO_PROT (PROT_READ | PROT_EXEC)
-#define VVAR_PROT (PROT_READ)
-
-#define VDSO_BAD_ADDR (-1ul)
-#define VVAR_BAD_ADDR VDSO_BAD_ADDR
-#define VDSO_BAD_PFN (-1ull)
-#define VVAR_BAD_PFN VDSO_BAD_PFN
-
-struct vdso_symbol {
- char name[32];
- unsigned long offset;
-};
-
-#define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, }
+#include "asm-generic/vdso.h"
-/* Check if symbol present in symtable */
-static inline bool vdso_symbol_empty(struct vdso_symbol *s)
-{
- return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
-}
+/* This definition is used in pie/util-vdso.c to initialize the vdso symbol
+ * name string table 'vdso_symbols'
+ */
/*
* This is a minimal amount of symbols
* we should support at the moment.
*/
-enum {
- VDSO_SYMBOL_CLOCK_GETTIME,
- VDSO_SYMBOL_GETCPU,
- VDSO_SYMBOL_GETTIMEOFDAY,
- VDSO_SYMBOL_TIME,
-
- VDSO_SYMBOL_MAX
-};
-
-struct vdso_symtable {
- unsigned long vma_start;
- unsigned long vma_end;
- unsigned long vvar_start;
- unsigned long vvar_end;
- struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
-};
-
-#define VDSO_SYMTABLE_INIT \
- { \
- .vma_start = VDSO_BAD_ADDR, \
- .vma_end = VDSO_BAD_ADDR, \
- .vvar_start = VVAR_BAD_ADDR, \
- .vvar_end = VVAR_BAD_ADDR, \
- .symbols = { \
- [0 ... VDSO_SYMBOL_MAX - 1] = \
- (struct vdso_symbol)VDSO_SYMBOL_INIT, \
- }, \
- }
-
-/* Size of VMA associated with vdso */
-static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
-{
- return t->vma_end - t->vma_start;
-}
-
-static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
-{
- return t->vvar_end - t->vvar_start;
-}
-/*
- * Special mark which allows to identify runtime vdso where
- * calls from proxy vdso are redirected. This mark usually
- * placed at the start of vdso area where Elf header lives.
- * Since such runtime vdso is solevey used by proxy and
- * nobody else is supposed to access it, it's more-less
- * safe to screw the Elf header with @signature and
- * @proxy_addr.
- *
- * The @proxy_addr deserves a few comments. When we redirect
- * the calls from proxy to runtime vdso, on next checkpoint
- * it won't be possible to find which VMA is proxy, thus
- * we save its address in the member.
- */
-struct vdso_mark {
- u64 signature;
- unsigned long proxy_vdso_addr;
-
- unsigned long version;
-
- /*
- * In case of new vDSO format the VVAR area address
- * neeed for easier discovering where it lives without
- * relying on procfs output.
- */
- unsigned long proxy_vvar_addr;
-};
-
-#define VDSO_MARK_SIGNATURE (0x6f73647675697263ULL) /* Magic number (criuvdso) */
-#define VDSO_MARK_SIGNATURE_V2 (0x4f53447675697263ULL) /* Magic number (criuvDSO) */
-#define VDSO_MARK_CUR_VERSION (2)
-
-static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
-{
- struct vdso_mark *m = where;
-
- m->signature = VDSO_MARK_SIGNATURE_V2;
- m->proxy_vdso_addr = proxy_vdso_addr;
- m->version = VDSO_MARK_CUR_VERSION;
- m->proxy_vvar_addr = proxy_vvar_addr;
-}
-
-static inline bool is_vdso_mark(void *addr)
-{
- struct vdso_mark *m = addr;
-
- if (m->signature == VDSO_MARK_SIGNATURE_V2) {
- /*
- * New format
- */
- return true;
- } else if (m->signature == VDSO_MARK_SIGNATURE) {
- /*
- * Old format -- simply extend the mark up
- * to the version we support.
- */
- vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
- return true;
- }
- return false;
-}
-
-#define VDSO_SYMBOL_CLOCK_GETTIME_NAME "__vdso_clock_gettime"
-#define VDSO_SYMBOL_GETCPU_NAME "__vdso_getcpu"
-#define VDSO_SYMBOL_GETTIMEOFDAY_NAME "__vdso_gettimeofday"
-#define VDSO_SYMBOL_TIME_NAME "__vdso_time"
-
-
+#define VDSO_SYMBOL_MAX 4
-extern struct vdso_symtable vdso_sym_rt;
-extern u64 vdso_pfn;
+#define ARCH_VDSO_SYMBOLS \
+ "__vdso_clock_gettime", \
+ "__vdso_getcpu", \
+ "__vdso_gettimeofday", \
+ "__vdso_time"
-extern int vdso_init(void);
-extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
-extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
-extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas);
-extern int vdso_redirect_calls(void *base_to, void *base_from, struct vdso_symtable *to, struct vdso_symtable *from);
-extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
- struct vm_area_list *vma_area_list);
+struct vdso_symtable;
+extern int vdso_redirect_calls(unsigned long base_to,
+ unsigned long base_from,
+ struct vdso_symtable *to,
+ struct vdso_symtable *from);
#endif /* __CR_ASM_VDSO_H__ */
diff --git a/arch/x86/vdso-pie.c b/arch/x86/vdso-pie.c
index 5330531c7623..b1e087cd8837 100644
--- a/arch/x86/vdso-pie.c
+++ b/arch/x86/vdso-pie.c
@@ -1,23 +1,10 @@
-#include <stdlib.h>
-#include <stdio.h>
#include <unistd.h>
-#include <string.h>
-#include <elf.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
#include "asm/string.h"
#include "asm/types.h"
-#include "compiler.h"
#include "syscall.h"
-#include "image.h"
-#include "vdso.h"
-#include "vma.h"
+#include "parasite-vdso.h"
#include "log.h"
#include "bug.h"
@@ -34,7 +21,7 @@ typedef struct {
u32 guards;
} __packed jmp_t;
-int vdso_redirect_calls(void *base_to, void *base_from,
+int vdso_redirect_calls(unsigned long base_to, unsigned long base_from,
struct vdso_symtable *to,
struct vdso_symtable *from)
{
@@ -50,420 +37,23 @@ int vdso_redirect_calls(void *base_to, void *base_from,
continue;
pr_debug("jmp: %lx/%lx -> %lx/%lx (index %d)\n",
- (unsigned long)base_from, from->symbols[i].offset,
- (unsigned long)base_to, to->symbols[i].offset, i);
+ base_from, from->symbols[i].offset,
+ base_to, to->symbols[i].offset, i);
- jmp.imm64 = (unsigned long)base_to + to->symbols[i].offset;
+ jmp.imm64 = base_to + to->symbols[i].offset;
builtin_memcpy((void *)(base_from + from->symbols[i].offset), &jmp, sizeof(jmp));
}
return 0;
}
-
-/* Check if pointer is out-of-bound */
-static bool __ptr_oob(void *ptr, void *start, size_t size)
-{
- void *end = (void *)((unsigned long)start + size);
- return ptr > end || ptr < start;
-}
-
-/*
- * Elf hash, see format specification.
- */
-static unsigned long elf_hash(const unsigned char *name)
-{
- unsigned long h = 0, g;
-
- while (*name) {
- h = (h << 4) + *name++;
- g = h & 0xf0000000ul;
- if (g)
- h ^= g >> 24;
- h &= ~g;
- }
- return h;
-}
-
-int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
-{
- Elf64_Phdr *dynamic = NULL, *load = NULL;
- Elf64_Ehdr *ehdr = (void *)mem;
- Elf64_Dyn *dyn_strtab = NULL;
- Elf64_Dyn *dyn_symtab = NULL;
- Elf64_Dyn *dyn_strsz = NULL;
- Elf64_Dyn *dyn_syment = NULL;
- Elf64_Dyn *dyn_hash = NULL;
- Elf64_Word *hash = NULL;
- Elf64_Phdr *phdr;
- Elf64_Dyn *d;
-
- Elf64_Word *bucket, *chain;
- Elf64_Word nbucket, nchain;
-
- /*
- * See Elf specification for this magic values.
- */
- const char elf_ident[] = {
- 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- };
-
- const char *vdso_symbols[VDSO_SYMBOL_MAX] = {
- [VDSO_SYMBOL_CLOCK_GETTIME] = VDSO_SYMBOL_CLOCK_GETTIME_NAME,
- [VDSO_SYMBOL_GETCPU] = VDSO_SYMBOL_GETCPU_NAME,
- [VDSO_SYMBOL_GETTIMEOFDAY] = VDSO_SYMBOL_GETTIMEOFDAY_NAME,
- [VDSO_SYMBOL_TIME] = VDSO_SYMBOL_TIME_NAME,
- };
-
- char *dynsymbol_names;
- unsigned int i, j, k;
-
- BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
-
- pr_debug("Parsing at %lx %lx\n", (long)mem, (long)mem + (long)size);
-
- /*
- * Make sure it's a file we support.
- */
- if (builtin_memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
- pr_err("Elf header magic mismatch\n");
- return -EINVAL;
- }
-
- /*
- * We need PT_LOAD and PT_DYNAMIC here. Each once.
- */
- phdr = (void *)&mem[ehdr->e_phoff];
- for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
- if (__ptr_oob(phdr, mem, size))
- goto err_oob;
- switch (phdr->p_type) {
- case PT_DYNAMIC:
- if (dynamic) {
- pr_err("Second PT_DYNAMIC header\n");
- return -EINVAL;
- }
- dynamic = phdr;
- break;
- case PT_LOAD:
- if (load) {
- pr_err("Second PT_LOAD header\n");
- return -EINVAL;
- }
- load = phdr;
- break;
- }
- }
-
- if (!load || !dynamic) {
- pr_err("One of obligated program headers is missed\n");
- return -EINVAL;
- }
-
- pr_debug("PT_LOAD p_vaddr: %lx\n", (unsigned long)load->p_vaddr);
-
- /*
- * Dynamic section tags should provide us the rest of information
- * needed. Note that we're interested in a small set of tags.
- */
- d = (void *)&mem[dynamic->p_offset];
- for (i = 0; i < dynamic->p_filesz / sizeof(*d); i++, d++) {
- if (__ptr_oob(d, mem, size))
- goto err_oob;
-
- if (d->d_tag == DT_NULL) {
- break;
- } else if (d->d_tag == DT_STRTAB) {
- dyn_strtab = d;
- pr_debug("DT_STRTAB: %p\n", (void *)d->d_un.d_ptr);
- } else if (d->d_tag == DT_SYMTAB) {
- dyn_symtab = d;
- pr_debug("DT_SYMTAB: %p\n", (void *)d->d_un.d_ptr);
- } else if (d->d_tag == DT_STRSZ) {
- dyn_strsz = d;
- pr_debug("DT_STRSZ: %lu\n", (unsigned long)d->d_un.d_val);
- } else if (d->d_tag == DT_SYMENT) {
- dyn_syment = d;
- pr_debug("DT_SYMENT: %lu\n", (unsigned long)d->d_un.d_val);
- } else if (d->d_tag == DT_HASH) {
- dyn_hash = d;
- pr_debug("DT_HASH: %p\n", (void *)d->d_un.d_ptr);
- }
- }
-
- if (!dyn_strtab || !dyn_symtab || !dyn_strsz || !dyn_syment || !dyn_hash) {
- pr_err("Not all dynamic entries are present\n");
- return -EINVAL;
- }
-
- dynsymbol_names = &mem[dyn_strtab->d_un.d_val - load->p_vaddr];
- if (__ptr_oob(dynsymbol_names, mem, size))
- goto err_oob;
-
- hash = (void *)&mem[(unsigned long)dyn_hash->d_un.d_ptr - (unsigned long)load->p_vaddr];
- if (__ptr_oob(hash, mem, size))
- goto err_oob;
-
- nbucket = hash[0];
- nchain = hash[1];
- bucket = &hash[2];
- chain = &hash[nbucket + 2];
-
- pr_debug("nbucket %lu nchain %lu bucket %p chain %p\n",
- (long)nbucket, (long)nchain, bucket, chain);
-
- for (i = 0; i < ARRAY_SIZE(vdso_symbols); i++) {
- k = elf_hash((const unsigned char *)vdso_symbols[i]);
-
- for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) {
- Elf64_Sym *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr];
- char *name;
-
- sym = &sym[j];
- if (__ptr_oob(sym, mem, size))
- continue;
-
- if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
- ELF64_ST_BIND(sym->st_info) != STB_GLOBAL)
- continue;
-
- name = &dynsymbol_names[sym->st_name];
- if (__ptr_oob(name, mem, size))
- continue;
-
- if (builtin_strcmp(name, vdso_symbols[i]))
- continue;
-
- builtin_memcpy(t->symbols[i].name, name, sizeof(t->symbols[i].name));
- t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
- break;
- }
- }
-
- return 0;
-
-err_oob:
- pr_err("Corrupted Elf data\n");
- return -EFAULT;
-}
-
-static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
-{
- unsigned long addr;
-
- pr_debug("Remap %s %lx -> %lx\n", who, from, to);
-
- addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
- if (addr != to) {
- pr_err("Unable to remap %lx -> %lx %lx\n",
- from, to, addr);
- return -1;
- }
-
- return 0;
-}
-
-/* Park runtime vDSO in some safe place where it can be accessible from restorer */
-int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
-{
- int ret;
-
- BUG_ON((vdso_vma_size(sym_rt) + vvar_vma_size(sym_rt)) < park_size);
-
- if (sym_rt->vvar_start != VDSO_BAD_ADDR) {
- if (sym_rt->vma_start < sym_rt->vvar_start) {
- ret = vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- park_at += vdso_vma_size(sym_rt);
- ret |= vdso_remap("rt-vvar", sym_rt->vvar_start,
- park_at, vvar_vma_size(sym_rt));
- } else {
- ret = vdso_remap("rt-vvar", sym_rt->vvar_start,
- park_at, vvar_vma_size(sym_rt));
- park_at += vvar_vma_size(sym_rt);
- ret |= vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- }
- } else
- ret = vdso_remap("rt-vdso", sym_rt->vma_start,
- park_at, vdso_vma_size(sym_rt));
- return ret;
-}
-
-int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas)
-{
- VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
- struct vdso_symtable s = VDSO_SYMTABLE_INIT;
- bool remap_rt = false;
-
- /*
- * Figue out which kind of vdso tuple we get.
- */
- if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
- vma_vdso = &vmas[index];
- else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
- vma_vvar = &vmas[index];
-
- if (index < (nr_vmas - 1)) {
- if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
- vma_vdso = &vmas[index + 1];
- else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
- vma_vvar = &vmas[index + 1];
- }
-
- if (!vma_vdso) {
- pr_err("Can't find vDSO area in image\n");
- return -1;
- }
-
- /*
- * vDSO mark overwrites Elf program header of proxy vDSO thus
- * it must never ever be greater in size.
- */
- BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
-
- /*
- * Find symbols in vDSO zone read from image.
- */
- if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
- return -1;
-
- /*
- * Proxification strategy
- *
- * - There might be two vDSO zones: vdso code and optionally vvar data
- * - To be able to use in-place remapping we need
- *
- * a) Size and order of vDSO zones are to match
- * b) Symbols offsets must match
- * c) Have same number of vDSO zones
- */
- if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
- if (s.symbols[i].offset != sym_rt->symbols[i].offset)
- break;
- }
-
- if (i == ARRAY_SIZE(s.symbols)) {
- if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
- remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
- if (remap_rt) {
- long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
- long delta_this = vma_vvar->start - vma_vdso->start;
-
- remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
- }
- } else
- remap_rt = true;
- }
- }
-
- pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
- vma_vdso->start, vma_vdso->end,
- vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
- vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
-
- /*
- * Easy case -- the vdso from image has same offsets, order and size
- * as runtime, so we simply remap runtime vdso to dumpee position
- * without generating any proxy.
- *
- * Note we may remap VVAR vdso as well which might not yet been mapped
- * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
- * not touch it anymore.
- */
- if (remap_rt) {
- int ret = 0;
-
- pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
-
- if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
- pr_err("Failed to unmap %s\n", who);
- return -1;
- }
-
- if (vma_vvar) {
- if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
- pr_err("Failed to unmap %s\n", who);
- return -1;
- }
-
- if (vma_vdso->start < vma_vvar->start) {
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
- vdso_rt_parked_at += vdso_vma_size(sym_rt);
- ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
- } else {
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
- vdso_rt_parked_at += vvar_vma_size(sym_rt);
- ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
- }
- } else
- ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
-
- return ret;
- }
-
- /*
- * Now complex case -- we need to proxify calls. We redirect
- * calls from dumpee vdso to runtime vdso, making dumpee
- * to operate as proxy vdso.
- */
- pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
-
- /*
- * Don't forget to shift if vvar is before vdso.
- */
- if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
- sym_rt->vvar_start < sym_rt->vma_start)
- vdso_rt_parked_at += vvar_vma_size(sym_rt);
-
- if (vdso_redirect_calls((void *)vdso_rt_parked_at,
- (void *)vma_vdso->start,
- sym_rt, &s)) {
- pr_err("Failed to proxify dumpee contents\n");
- return -1;
- }
-
- /*
- * Put a special mark into runtime vdso, thus at next checkpoint
- * routine we could detect this vdso and do not dump it, since
- * it's auto-generated every new session if proxy required.
- */
- sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE);
- vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
- sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT);
- return 0;
-}
-
#else /* CONFIG_X86_64 */
-int vdso_redirect_calls(void *base_to, void *base_from,
+int vdso_redirect_calls(unsigned long base_to, unsigned long base_from,
struct vdso_symtable *to,
struct vdso_symtable *from)
{
return 0;
}
-int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
-{
- return 0;
-}
-
-int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
-{
- return 0;
-}
-
-int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
- unsigned long vdso_rt_parked_at, size_t index,
- VmaEntry *vmas, size_t nr_vmas)
-{
- return 0;
-}
-
#endif /* CONFIG_X86_64 */
diff --git a/include/asm-generic/vdso.h b/include/asm-generic/vdso.h
new file mode 100644
index 000000000000..bb746055416b
--- /dev/null
+++ b/include/asm-generic/vdso.h
@@ -0,0 +1,12 @@
+#ifndef __CR_ASM_GENERIC_VDSO_H__
+#define __CR_ASM_GENERIC_VDSO_H__
+
+#define VDSO_PROT (PROT_READ | PROT_EXEC)
+#define VVAR_PROT (PROT_READ)
+
+#define VDSO_BAD_ADDR (-1ul)
+#define VVAR_BAD_ADDR VDSO_BAD_ADDR
+#define VDSO_BAD_PFN (-1ull)
+#define VVAR_BAD_PFN VDSO_BAD_PFN
+
+#endif /* __CR_ASM_GENERIC_VDSO_H__ */
diff --git a/include/parasite-syscall.h b/include/parasite-syscall.h
index bf186cdf15ff..64cec51b5017 100644
--- a/include/parasite-syscall.h
+++ b/include/parasite-syscall.h
@@ -118,11 +118,6 @@ extern int __parasite_execute_syscall(struct parasite_ctl *ctl,
user_regs_struct_t *regs);
extern bool arch_can_dump_task(pid_t pid);
-#ifdef CONFIG_VDSO
-extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
- struct vm_area_list *vma_area_list);
-#endif
-
/*
* The PTRACE_SYSCALL will trap task twice -- on
* enter into and on exit from syscall. If we trace
diff --git a/include/parasite-vdso.h b/include/parasite-vdso.h
new file mode 100644
index 000000000000..15e750dac3a5
--- /dev/null
+++ b/include/parasite-vdso.h
@@ -0,0 +1,96 @@
+#ifndef __CR_PARASITE_VDSO_H__
+#define __CR_PARASITE_VDSO_H__
+
+#include "config.h"
+
+#ifdef CONFIG_VDSO
+
+#include "util-vdso.h"
+#include "protobuf/vma.pb-c.h"
+
+struct parasite_ctl;
+struct vm_area_list;
+
+/* Check if symbol present in symtable */
+static inline bool vdso_symbol_empty(struct vdso_symbol *s)
+{
+ return s->offset == VDSO_BAD_ADDR && s->name[0] == '\0';
+}
+
+/*
+ * Special mark which allows to identify runtime vdso where
+ * calls from proxy vdso are redirected. This mark usually
+ * placed at the start of vdso area where Elf header lives.
+ * Since such runtime vdso is solevey used by proxy and
+ * nobody else is supposed to access it, it's more-less
+ * safe to screw the Elf header with @signature and
+ * @proxy_addr.
+ *
+ * The @proxy_addr deserves a few comments. When we redirect
+ * the calls from proxy to runtime vdso, on next checkpoint
+ * it won't be possible to find which VMA is proxy, thus
+ * we save its address in the member.
+ */
+struct vdso_mark {
+ u64 signature;
+ unsigned long proxy_vdso_addr;
+
+ unsigned long version;
+
+ /*
+ * In case of new vDSO format the VVAR area address
+ * neeed for easier discovering where it lives without
+ * relying on procfs output.
+ */
+ unsigned long proxy_vvar_addr;
+};
+
+#define VDSO_MARK_SIGNATURE (0x6f73647675697263ULL) /* Magic number (criuvdso) */
+#define VDSO_MARK_SIGNATURE_V2 (0x4f53447675697263ULL) /* Magic number (criuvDSO) */
+#define VDSO_MARK_CUR_VERSION (2)
+
+static inline void vdso_put_mark(void *where, unsigned long proxy_vdso_addr, unsigned long proxy_vvar_addr)
+{
+ struct vdso_mark *m = where;
+
+ m->signature = VDSO_MARK_SIGNATURE_V2;
+ m->proxy_vdso_addr = proxy_vdso_addr;
+ m->version = VDSO_MARK_CUR_VERSION;
+ m->proxy_vvar_addr = proxy_vvar_addr;
+}
+
+static inline bool is_vdso_mark(void *addr)
+{
+ struct vdso_mark *m = addr;
+
+ if (m->signature == VDSO_MARK_SIGNATURE_V2) {
+ /*
+ * New format
+ */
+ return true;
+ } else if (m->signature == VDSO_MARK_SIGNATURE) {
+ /*
+ * Old format -- simply extend the mark up
+ * to the version we support.
+ */
+ vdso_put_mark(m, m->proxy_vdso_addr, VVAR_BAD_ADDR);
+ return true;
+ }
+ return false;
+}
+
+extern int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size);
+extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
+extern int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
+ unsigned long vdso_rt_parked_at, size_t index,
+ VmaEntry *vmas, size_t nr_vmas);
+
+/* only used by aarch64 => to be moved to aarch64/include/asm/vdso.h */
+extern void write_intraprocedure_branch(void *to, void *from);
+
+#else /* CONFIG_VDSO */
+#define vdso_do_park(sym_rt, park_at, park_size) (0)
+
+#endif /* CONFIG_VDSO */
+
+#endif /* __CR_PARASITE_VDSO_H__ */
diff --git a/include/restorer.h b/include/restorer.h
index 56b9938a0156..afcaf6804678 100644
--- a/include/restorer.h
+++ b/include/restorer.h
@@ -19,7 +19,7 @@
#include "timerfd.h"
#include "shmem.h"
#include "sigframe.h"
-#include "vdso.h"
+#include "parasite-vdso.h"
#include <time.h>
diff --git a/include/util-vdso.h b/include/util-vdso.h
new file mode 100644
index 000000000000..2942337d7a61
--- /dev/null
+++ b/include/util-vdso.h
@@ -0,0 +1,69 @@
+#ifndef __CR_UTIL_VDSO_H__
+#define __CR_UTIL_VDSO_H__
+
+/*
+ * VDSO management common definitions.
+ *
+ * This header file is included by the criu main code and the parasite code.
+ * It contains definitions shared by these 2 parts.
+ *
+ * This file should not be included except in pie/util-vdso.c, include/vdso.h
+ * and include/parasite-vdso.h
+ */
+
+#include <sys/types.h>
+
+/*
+ * Each architecture must export:
+ * VDSO_SYMBOL_MAX, the number of vDSO symbols to manage
+ * ARCH_VDSO_SYMBOLS, a table of string containing the vDSO symbol names
+ * vdso_redirect_calls, a service called to redirect the vDSO symbols in
+ * the parasite code.
+ */
+#include "asm/vdso.h"
+
+struct vdso_symbol {
+ char name[32];
+ unsigned long offset;
+};
+
+struct vdso_symtable {
+ unsigned long vma_start;
+ unsigned long vma_end;
+ unsigned long vvar_start;
+ unsigned long vvar_end;
+ struct vdso_symbol symbols[VDSO_SYMBOL_MAX];
+};
+
+#define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, }
+
+#define VDSO_SYMTABLE_INIT \
+ { \
+ .vma_start = VDSO_BAD_ADDR, \
+ .vma_end = VDSO_BAD_ADDR, \
+ .vvar_start = VVAR_BAD_ADDR, \
+ .vvar_end = VVAR_BAD_ADDR, \
+ .symbols = { \
+ [0 ... VDSO_SYMBOL_MAX - 1] = \
+ (struct vdso_symbol)VDSO_SYMBOL_INIT, \
+ }, \
+ }
+
+/* Size of VMA associated with vdso */
+static inline unsigned long vdso_vma_size(struct vdso_symtable *t)
+{
+ return t->vma_end - t->vma_start;
+}
+
+static inline unsigned long vvar_vma_size(struct vdso_symtable *t)
+{
+ return t->vvar_end - t->vvar_start;
+}
+
+extern const char *vdso_symbols[VDSO_SYMBOL_MAX];
+
+extern int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t);
+
+
+
+#endif /* __CR_UTIL_VDSO_H__ */
diff --git a/include/vdso.h b/include/vdso.h
index 9fc174dbf0fa..ea6bfabbf3ec 100644
--- a/include/vdso.h
+++ b/include/vdso.h
@@ -8,17 +8,19 @@
#ifdef CONFIG_VDSO
-#include "asm/vdso.h"
+#include "util-vdso.h"
+
+extern struct vdso_symtable vdso_sym_rt;
+
+extern int vdso_init(void);
+
+extern int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
+ struct vm_area_list *vma_area_list);
#else /* CONFIG_VDSO */
#define vdso_init() (0)
#define parasite_fixup_vdso(ctl, pid, vma_area_list) (0)
-#define vdso_vma_size(t) (0)
-#define vdso_do_park(sym_rt, park_at, park_size) (0)
-#define vdso_remap(who, from, to, size) (0)
-#define vdso_proxify(who, sym_rt, vdso_rt_parked_at, \
- index, vmas, nr_vmas) (0)
#endif /* CONFIG_VDSO */
diff --git a/pie/Makefile b/pie/Makefile
index 119822ae5d93..b9edcc959e47 100644
--- a/pie/Makefile
+++ b/pie/Makefile
@@ -6,6 +6,8 @@ obj-y += util.o
obj-y += util-fd.o
ifeq ($(VDSO),y)
+obj-y += util-vdso.o
+obj-y += parasite-vdso.o
obj-e += $(ARCH_DIR)/vdso-pie.o
ifeq ($(SRCARCH),aarch64)
asm-e += $(ARCH_DIR)/intraprocedure.o
diff --git a/pie/parasite-vdso.c b/pie/parasite-vdso.c
new file mode 100644
index 000000000000..9ee42e52875a
--- /dev/null
+++ b/pie/parasite-vdso.c
@@ -0,0 +1,218 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "asm/string.h"
+#include "asm/types.h"
+
+#include "syscall.h"
+#include "image.h"
+#include "parasite-vdso.h"
+#include "vma.h"
+#include "log.h"
+#include "bug.h"
+
+#ifdef LOG_PREFIX
+# undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+
+
+static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size)
+{
+ unsigned long addr;
+
+ pr_debug("Remap %s %lx -> %lx\n", who, from, to);
+
+ addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to);
+ if (addr != to) {
+ pr_err("Unable to remap %lx -> %lx %lx\n",
+ from, to, addr);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Park runtime vDSO in some safe place where it can be accessible from restorer */
+int vdso_do_park(struct vdso_symtable *sym_rt, unsigned long park_at, unsigned long park_size)
+{
+ int ret;
+
+ BUG_ON((vdso_vma_size(sym_rt) + vvar_vma_size(sym_rt)) < park_size);
+
+ if (sym_rt->vvar_start != VDSO_BAD_ADDR) {
+ if (sym_rt->vma_start < sym_rt->vvar_start) {
+ ret = vdso_remap("rt-vdso", sym_rt->vma_start,
+ park_at, vdso_vma_size(sym_rt));
+ park_at += vdso_vma_size(sym_rt);
+ ret |= vdso_remap("rt-vvar", sym_rt->vvar_start,
+ park_at, vvar_vma_size(sym_rt));
+ } else {
+ ret = vdso_remap("rt-vvar", sym_rt->vvar_start,
+ park_at, vvar_vma_size(sym_rt));
+ park_at += vvar_vma_size(sym_rt);
+ ret |= vdso_remap("rt-vdso", sym_rt->vma_start,
+ park_at, vdso_vma_size(sym_rt));
+ }
+ } else
+ ret = vdso_remap("rt-vdso", sym_rt->vma_start,
+ park_at, vdso_vma_size(sym_rt));
+ return ret;
+}
+
+int vdso_proxify(char *who, struct vdso_symtable *sym_rt,
+ unsigned long vdso_rt_parked_at, size_t index,
+ VmaEntry *vmas, size_t nr_vmas)
+{
+ VmaEntry *vma_vdso = NULL, *vma_vvar = NULL;
+ struct vdso_symtable s = VDSO_SYMTABLE_INIT;
+ bool remap_rt = false;
+
+ /*
+ * Figure out which kind of vdso tuple we get.
+ */
+ if (vma_entry_is(&vmas[index], VMA_AREA_VDSO))
+ vma_vdso = &vmas[index];
+ else if (vma_entry_is(&vmas[index], VMA_AREA_VVAR))
+ vma_vvar = &vmas[index];
+
+ if (index < (nr_vmas - 1)) {
+ if (vma_entry_is(&vmas[index + 1], VMA_AREA_VDSO))
+ vma_vdso = &vmas[index + 1];
+ else if (vma_entry_is(&vmas[index + 1], VMA_AREA_VVAR))
+ vma_vvar = &vmas[index + 1];
+ }
+
+ if (!vma_vdso) {
+ pr_err("Can't find vDSO area in image\n");
+ return -1;
+ }
+
+ /*
+ * vDSO mark overwrites Elf program header of proxy vDSO thus
+ * it must never ever be greater in size.
+ */
+ BUILD_BUG_ON(sizeof(struct vdso_mark) > sizeof(Elf64_Phdr));
+
+ /*
+ * Find symbols in vDSO zone read from image.
+ */
+ if (vdso_fill_symtable((void *)vma_vdso->start, vma_entry_len(vma_vdso), &s))
+ return -1;
+
+ /*
+ * Proxification strategy
+ *
+ * - There might be two vDSO zones: vdso code and optionally vvar data
+ * - To be able to use in-place remapping we need
+ *
+ * a) Size and order of vDSO zones are to match
+ * b) Symbols offsets must match
+ * c) Have same number of vDSO zones
+ */
+ if (vma_entry_len(vma_vdso) == vdso_vma_size(sym_rt)) {
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(s.symbols); i++) {
+ if (s.symbols[i].offset != sym_rt->symbols[i].offset)
+ break;
+ }
+
+ if (i == ARRAY_SIZE(s.symbols)) {
+ if (vma_vvar && sym_rt->vvar_start != VVAR_BAD_ADDR) {
+ remap_rt = (vvar_vma_size(sym_rt) == vma_entry_len(vma_vvar));
+ if (remap_rt) {
+ long delta_rt = sym_rt->vvar_start - sym_rt->vma_start;
+ long delta_this = vma_vvar->start - vma_vdso->start;
+
+ remap_rt = (delta_rt ^ delta_this) < 0 ? false : true;
+ }
+ } else
+ remap_rt = true;
+ }
+ }
+
+ pr_debug("image [vdso] %lx-%lx [vvar] %lx-%lx\n",
+ vma_vdso->start, vma_vdso->end,
+ vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR,
+ vma_vvar ? vma_vvar->end : VVAR_BAD_ADDR);
+
+ /*
+ * Easy case -- the vdso from image has same offsets, order and size
+ * as runtime, so we simply remap runtime vdso to dumpee position
+ * without generating any proxy.
+ *
+ * Note we may remap VVAR vdso as well which might not yet been mapped
+ * by a caller code. So drop VMA_AREA_REGULAR from it and caller would
+ * not touch it anymore.
+ */
+ if (remap_rt) {
+ int ret = 0;
+
+ pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n");
+
+ if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) {
+ pr_err("Failed to unmap %s\n", who);
+ return -1;
+ }
+
+ if (vma_vvar) {
+ if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) {
+ pr_err("Failed to unmap %s\n", who);
+ return -1;
+ }
+
+ if (vma_vdso->start < vma_vvar->start) {
+ ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
+ vdso_rt_parked_at += vdso_vma_size(sym_rt);
+ ret |= vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
+ } else {
+ ret = vdso_remap(who, vdso_rt_parked_at, vma_vvar->start, vvar_vma_size(sym_rt));
+ vdso_rt_parked_at += vvar_vma_size(sym_rt);
+ ret |= vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
+ }
+ } else
+ ret = vdso_remap(who, vdso_rt_parked_at, vma_vdso->start, vdso_vma_size(sym_rt));
+
+ return ret;
+ }
+
+ /*
+ * Now complex case -- we need to proxify calls. We redirect
+ * calls from dumpee vdso to runtime vdso, making dumpee
+ * to operate as proxy vdso.
+ */
+ pr_info("Runtime vdso mismatches dumpee, generate proxy\n");
+
+ /*
+ * Don't forget to shift if vvar is before vdso.
+ */
+ if (sym_rt->vvar_start != VDSO_BAD_ADDR &&
+ sym_rt->vvar_start < sym_rt->vma_start)
+ vdso_rt_parked_at += vvar_vma_size(sym_rt);
+
+ if (vdso_redirect_calls(vdso_rt_parked_at,
+ vma_vdso->start,
+ sym_rt, &s)) {
+ pr_err("Failed to proxify dumpee contents\n");
+ return -1;
+ }
+
+ /*
+ * Put a special mark into runtime vdso, thus at next checkpoint
+ * routine we could detect this vdso and do not dump it, since
+ * it's auto-generated every new session if proxy required.
+ */
+ sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), PROT_WRITE);
+ vdso_put_mark((void *)vdso_rt_parked_at, vma_vdso->start, vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR);
+ sys_mprotect((void *)vdso_rt_parked_at, vdso_vma_size(sym_rt), VDSO_PROT);
+ return 0;
+}
diff --git a/pie/parasite.c b/pie/parasite.c
index cbaff4a90ff2..a39c035a7f71 100644
--- a/pie/parasite.c
+++ b/pie/parasite.c
@@ -13,7 +13,7 @@
#include "fcntl.h"
#include "prctl.h"
#include "lock.h"
-#include "vdso.h"
+#include "parasite-vdso.h"
#include "log.h"
#include "tty.h"
diff --git a/pie/util-vdso.c b/pie/util-vdso.c
new file mode 100644
index 000000000000..9711aaa2db7c
--- /dev/null
+++ b/pie/util-vdso.c
@@ -0,0 +1,211 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "asm/string.h"
+#include "asm/types.h"
+
+#include "syscall.h"
+#include "image.h"
+#include "util-vdso.h"
+#include "vma.h"
+#include "log.h"
+#include "bug.h"
+
+#ifdef LOG_PREFIX
+# undef LOG_PREFIX
+#endif
+#define LOG_PREFIX "vdso: "
+
+const char *vdso_symbols[VDSO_SYMBOL_MAX] = {
+ ARCH_VDSO_SYMBOLS
+};
+
+/* Check if pointer is out-of-bound */
+static bool __ptr_oob(void *ptr, void *start, size_t size)
+{
+ void *end = (void *)((unsigned long)start + size);
+ return ptr > end || ptr < start;
+}
+
+/*
+ * Elf hash, see format specification.
+ */
+static unsigned long elf_hash(const unsigned char *name)
+{
+ unsigned long h = 0, g;
+
+ while (*name) {
+ h = (h << 4) + *name++;
+ g = h & 0xf0000000ul;
+ if (g)
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+}
+
+int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t)
+{
+ Elf64_Phdr *dynamic = NULL, *load = NULL;
+ Elf64_Ehdr *ehdr = (void *)mem;
+ Elf64_Dyn *dyn_strtab = NULL;
+ Elf64_Dyn *dyn_symtab = NULL;
+ Elf64_Dyn *dyn_strsz = NULL;
+ Elf64_Dyn *dyn_syment = NULL;
+ Elf64_Dyn *dyn_hash = NULL;
+ Elf64_Word *hash = NULL;
+ Elf64_Phdr *phdr;
+ Elf64_Dyn *d;
+
+ Elf64_Word *bucket, *chain;
+ Elf64_Word nbucket, nchain;
+
+ /*
+ * See Elf specification for this magic values.
+ */
+ static const char elf_ident[] = {
+ 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
+
+ char *dynsymbol_names;
+ unsigned int i, j, k;
+
+ BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident));
+
+ pr_debug("Parsing at %lx %lx\n", (long)mem, (long)mem + (long)size);
+
+ /*
+ * Make sure it's a file we support.
+ */
+ if (builtin_memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) {
+ pr_err("Elf header magic mismatch\n");
+ return -EINVAL;
+ }
+
+ /*
+ * We need PT_LOAD and PT_DYNAMIC here. Each once.
+ */
+ phdr = (void *)&mem[ehdr->e_phoff];
+ for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+ if (__ptr_oob(phdr, mem, size))
+ goto err_oob;
+ switch (phdr->p_type) {
+ case PT_DYNAMIC:
+ if (dynamic) {
+ pr_err("Second PT_DYNAMIC header\n");
+ return -EINVAL;
+ }
+ dynamic = phdr;
+ break;
+ case PT_LOAD:
+ if (load) {
+ pr_err("Second PT_LOAD header\n");
+ return -EINVAL;
+ }
+ load = phdr;
+ break;
+ }
+ }
+
+ if (!load || !dynamic) {
+ pr_err("One of obligated program headers is missed\n");
+ return -EINVAL;
+ }
+
+ pr_debug("PT_LOAD p_vaddr: %lx\n", (unsigned long)load->p_vaddr);
+
+ /*
+ * Dynamic section tags should provide us the rest of information
+ * needed. Note that we're interested in a small set of tags.
+ */
+ d = (void *)&mem[dynamic->p_offset];
+ for (i = 0; i < dynamic->p_filesz / sizeof(*d); i++, d++) {
+ if (__ptr_oob(d, mem, size))
+ goto err_oob;
+
+ if (d->d_tag == DT_NULL) {
+ break;
+ } else if (d->d_tag == DT_STRTAB) {
+ dyn_strtab = d;
+ pr_debug("DT_STRTAB: %lx\n", (unsigned long)d->d_un.d_ptr);
+ } else if (d->d_tag == DT_SYMTAB) {
+ dyn_symtab = d;
+ pr_debug("DT_SYMTAB: %lx\n", (unsigned long)d->d_un.d_ptr);
+ } else if (d->d_tag == DT_STRSZ) {
+ dyn_strsz = d;
+ pr_debug("DT_STRSZ: %lx\n", (unsigned long)d->d_un.d_val);
+ } else if (d->d_tag == DT_SYMENT) {
+ dyn_syment = d;
+ pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val);
+ } else if (d->d_tag == DT_HASH) {
+ dyn_hash = d;
+ pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr);
+ }
+ }
+
+ if (!dyn_strtab || !dyn_symtab || !dyn_strsz || !dyn_syment || !dyn_hash) {
+ pr_err("Not all dynamic entries are present\n");
+ return -EINVAL;
+ }
+
+ dynsymbol_names = &mem[dyn_strtab->d_un.d_val - load->p_vaddr];
+ if (__ptr_oob(dynsymbol_names, mem, size))
+ goto err_oob;
+
+ hash = (void *)&mem[(unsigned long)dyn_hash->d_un.d_ptr - (unsigned long)load->p_vaddr];
+ if (__ptr_oob(hash, mem, size))
+ goto err_oob;
+
+ nbucket = hash[0];
+ nchain = hash[1];
+ bucket = &hash[2];
+ chain = &hash[nbucket + 2];
+
+ pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n",
+ (long)nbucket, (long)nchain, (unsigned long)bucket, (unsigned long)chain);
+
+ for (i = 0; i < VDSO_SYMBOL_MAX; i++) {
+ const char * symbol = vdso_symbols[i];
+ k = elf_hash((const unsigned char *)symbol);
+
+ for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) {
+ Elf64_Sym *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr];
+ char *name;
+
+ sym = &sym[j];
+ if (__ptr_oob(sym, mem, size))
+ continue;
+
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC &&
+ ELF64_ST_BIND(sym->st_info) != STB_GLOBAL)
+ continue;
+
+ name = &dynsymbol_names[sym->st_name];
+ if (__ptr_oob(name, mem, size))
+ continue;
+
+ if (builtin_strcmp(name, symbol))
+ continue;
+
+ builtin_memcpy(t->symbols[i].name, name, sizeof(t->symbols[i].name));
+ t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr;
+ break;
+ }
+ }
+
+ return 0;
+
+err_oob:
+ pr_err("Corrupted Elf data\n");
+ return -EFAULT;
+}
+
--
1.9.1
More information about the CRIU
mailing list