[CRIU] [PATCH 3/4] arm64: provide own memcpy for pie

Kir Kolyshkin kir at openvz.org
Mon Nov 7 13:23:43 PST 2016


Chris, Dmitry,

Can you please take a look at this (and, ideally, test on amd64 with 
both clang and gcc)?


On 11/03/2016 11:25 PM, Kir Kolyshkin wrote:
> Got the following error trying to compile this with clang-3.6 on arm64:
>
>>    GEN      criu/pie/restorer.built-in.bin.o
>> criu/pie/native.lib.a(parasite-vdso.o): In function `vdso_proxify':
>> /criu/criu/pie/parasite-vdso.c:132: undefined reference to `memcpy'
> This happens because clang emits a call to memcpy for struct
> initialization (specifically, struct vdso_symtable in vdso_proxify()).
>
> See commit 214e280 ("pie: provide own memcpy for x86") for more
> background info about the issue.
>
> Cc: Christopher Covington <cov at codeaurora.org>
> Cc: Dmitry Safonov <dsafonov at virtuozzo.com>
> Signed-off-by: Kir Kolyshkin <kir at openvz.org>
> ---
>   criu/arch/aarch64/copy_template.S      | 193 +++++++++++++++++++++++++++++++++
>   criu/arch/aarch64/include/asm/string.h |  14 +++
>   criu/arch/aarch64/memcpy.S             |  82 ++++++++++++++
>   criu/pie/Makefile.library              |   3 +
>   4 files changed, 292 insertions(+)
>   create mode 100644 criu/arch/aarch64/copy_template.S
>   create mode 100644 criu/arch/aarch64/memcpy.S
>
> diff --git a/criu/arch/aarch64/copy_template.S b/criu/arch/aarch64/copy_template.S
> new file mode 100644
> index 0000000..410fbdb
> --- /dev/null
> +++ b/criu/arch/aarch64/copy_template.S
> @@ -0,0 +1,193 @@
> +/*
> + * Copyright (C) 2013 ARM Ltd.
> + * Copyright (C) 2013 Linaro.
> + *
> + * This code is based on glibc cortex strings work originally authored by Linaro
> + * and re-licensed under GPLv2 for the Linux kernel. The original code can
> + * be found @
> + *
> + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
> + * files/head:/src/aarch64/
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +
> +/*
> + * Copy a buffer from src to dest (alignment handled by the hardware)
> + *
> + * Parameters:
> + *	x0 - dest
> + *	x1 - src
> + *	x2 - n
> + * Returns:
> + *	x0 - dest
> + */
> +dstin	.req	x0
> +src	.req	x1
> +count	.req	x2
> +tmp1	.req	x3
> +tmp1w	.req	w3
> +tmp2	.req	x4
> +tmp2w	.req	w4
> +dst	.req	x6
> +
> +A_l	.req	x7
> +A_h	.req	x8
> +B_l	.req	x9
> +B_h	.req	x10
> +C_l	.req	x11
> +C_h	.req	x12
> +D_l	.req	x13
> +D_h	.req	x14
> +
> +	mov	dst, dstin
> +	cmp	count, #16
> +	/*When memory length is less than 16, the accessed are not aligned.*/
> +	b.lo	.Ltiny15
> +
> +	neg	tmp2, src
> +	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
> +	b.eq	.LSrcAligned
> +	sub	count, count, tmp2
> +	/*
> +	* Copy the leading memory data from src to dst in an increasing
> +	* address order.By this way,the risk of overwritting the source
> +	* memory data is eliminated when the distance between src and
> +	* dst is less than 16. The memory accesses here are alignment.
> +	*/
> +	tbz	tmp2, #0, 1f
> +	ldrb1	tmp1w, src, #1
> +	strb1	tmp1w, dst, #1
> +1:
> +	tbz	tmp2, #1, 2f
> +	ldrh1	tmp1w, src, #2
> +	strh1	tmp1w, dst, #2
> +2:
> +	tbz	tmp2, #2, 3f
> +	ldr1	tmp1w, src, #4
> +	str1	tmp1w, dst, #4
> +3:
> +	tbz	tmp2, #3, .LSrcAligned
> +	ldr1	tmp1, src, #8
> +	str1	tmp1, dst, #8
> +
> +.LSrcAligned:
> +	cmp	count, #64
> +	b.ge	.Lcpy_over64
> +	/*
> +	* Deal with small copies quickly by dropping straight into the
> +	* exit block.
> +	*/
> +.Ltail63:
> +	/*
> +	* Copy up to 48 bytes of data. At this point we only need the
> +	* bottom 6 bits of count to be accurate.
> +	*/
> +	ands	tmp1, count, #0x30
> +	b.eq	.Ltiny15
> +	cmp	tmp1w, #0x20
> +	b.eq	1f
> +	b.lt	2f
> +	ldp1	A_l, A_h, src, #16
> +	stp1	A_l, A_h, dst, #16
> +1:
> +	ldp1	A_l, A_h, src, #16
> +	stp1	A_l, A_h, dst, #16
> +2:
> +	ldp1	A_l, A_h, src, #16
> +	stp1	A_l, A_h, dst, #16
> +.Ltiny15:
> +	/*
> +	* Prefer to break one ldp/stp into several load/store to access
> +	* memory in an increasing address order,rather than to load/store 16
> +	* bytes from (src-16) to (dst-16) and to backward the src to aligned
> +	* address,which way is used in original cortex memcpy. If keeping
> +	* the original memcpy process here, memmove need to satisfy the
> +	* precondition that src address is at least 16 bytes bigger than dst
> +	* address,otherwise some source data will be overwritten when memove
> +	* call memcpy directly. To make memmove simpler and decouple the
> +	* memcpy's dependency on memmove, withdrew the original process.
> +	*/
> +	tbz	count, #3, 1f
> +	ldr1	tmp1, src, #8
> +	str1	tmp1, dst, #8
> +1:
> +	tbz	count, #2, 2f
> +	ldr1	tmp1w, src, #4
> +	str1	tmp1w, dst, #4
> +2:
> +	tbz	count, #1, 3f
> +	ldrh1	tmp1w, src, #2
> +	strh1	tmp1w, dst, #2
> +3:
> +	tbz	count, #0, .Lexitfunc
> +	ldrb1	tmp1w, src, #1
> +	strb1	tmp1w, dst, #1
> +
> +	b	.Lexitfunc
> +
> +.Lcpy_over64:
> +	subs	count, count, #128
> +	b.ge	.Lcpy_body_large
> +	/*
> +	* Less than 128 bytes to copy, so handle 64 here and then jump
> +	* to the tail.
> +	*/
> +	ldp1	A_l, A_h, src, #16
> +	stp1	A_l, A_h, dst, #16
> +	ldp1	B_l, B_h, src, #16
> +	ldp1	C_l, C_h, src, #16
> +	stp1	B_l, B_h, dst, #16
> +	stp1	C_l, C_h, dst, #16
> +	ldp1	D_l, D_h, src, #16
> +	stp1	D_l, D_h, dst, #16
> +
> +	tst	count, #0x3f
> +	b.ne	.Ltail63
> +	b	.Lexitfunc
> +
> +	/*
> +	* Critical loop.  Start at a new cache line boundary.  Assuming
> +	* 64 bytes per line this ensures the entire loop is in one line.
> +	*/
> +	.p2align	L1_CACHE_SHIFT
> +.Lcpy_body_large:
> +	/* pre-get 64 bytes data. */
> +	ldp1	A_l, A_h, src, #16
> +	ldp1	B_l, B_h, src, #16
> +	ldp1	C_l, C_h, src, #16
> +	ldp1	D_l, D_h, src, #16
> +1:
> +	/*
> +	* interlace the load of next 64 bytes data block with store of the last
> +	* loaded 64 bytes data.
> +	*/
> +	stp1	A_l, A_h, dst, #16
> +	ldp1	A_l, A_h, src, #16
> +	stp1	B_l, B_h, dst, #16
> +	ldp1	B_l, B_h, src, #16
> +	stp1	C_l, C_h, dst, #16
> +	ldp1	C_l, C_h, src, #16
> +	stp1	D_l, D_h, dst, #16
> +	ldp1	D_l, D_h, src, #16
> +	subs	count, count, #64
> +	b.ge	1b
> +	stp1	A_l, A_h, dst, #16
> +	stp1	B_l, B_h, dst, #16
> +	stp1	C_l, C_h, dst, #16
> +	stp1	D_l, D_h, dst, #16
> +
> +	tst	count, #0x3f
> +	b.ne	.Ltail63
> +.Lexitfunc:
> diff --git a/criu/arch/aarch64/include/asm/string.h b/criu/arch/aarch64/include/asm/string.h
> index 020a8ec..5bd5bd8 100644
> --- a/criu/arch/aarch64/include/asm/string.h
> +++ b/criu/arch/aarch64/include/asm/string.h
> @@ -1,7 +1,21 @@
>   #ifndef __CR_ASM_STRING_H__
>   #define __CR_ASM_STRING_H__
>   
> +#define HAS_BUILTIN_MEMCPY
> +
>   #include "common/compiler.h"
>   #include "asm-generic/string.h"
>   
> +#ifdef CR_NOGLIBC
> +extern void *memcpy_arm64(void *to, const void *from, size_t n);
> +static inline void *builtin_memcpy(void *to, const void *from, size_t n)
> +{
> +	if (n)
> +		memcpy_arm64(to, from, n);
> +	return to;
> +}
> +#else
> +#define builtin_memcpy memcpy
> +#endif /* CR_NOGLIBC */
> +
>   #endif /* __CR_ASM_STRING_H__ */
> diff --git a/criu/arch/aarch64/memcpy.S b/criu/arch/aarch64/memcpy.S
> new file mode 100644
> index 0000000..0d768ac
> --- /dev/null
> +++ b/criu/arch/aarch64/memcpy.S
> @@ -0,0 +1,82 @@
> +#include "asm/linkage.h"
> +
> +#define L1_CACHE_SHIFT		7
> +
> +/*
> + * The following code is taken from the Linux kernel (arch/arm64/lib/memcpy.S)
> + * with trivial modifications (change includes, use END macro, rename).
> + */
> +
> +/*
> + * Copyright (C) 2013 ARM Ltd.
> + * Copyright (C) 2013 Linaro.
> + *
> + * This code is based on glibc cortex strings work originally authored by Linaro
> + * and re-licensed under GPLv2 for the Linux kernel. The original code can
> + * be found @
> + *
> + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
> + * files/head:/src/aarch64/
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/*
> + * Copy a buffer from src to dest (alignment handled by the hardware)
> + *
> + * Parameters:
> + *	x0 - dest
> + *	x1 - src
> + *	x2 - n
> + * Returns:
> + *	x0 - dest
> + */
> +	.macro ldrb1 ptr, regB, val
> +	ldrb  \ptr, [\regB], \val
> +	.endm
> +
> +	.macro strb1 ptr, regB, val
> +	strb \ptr, [\regB], \val
> +	.endm
> +
> +	.macro ldrh1 ptr, regB, val
> +	ldrh  \ptr, [\regB], \val
> +	.endm
> +
> +	.macro strh1 ptr, regB, val
> +	strh \ptr, [\regB], \val
> +	.endm
> +
> +	.macro ldr1 ptr, regB, val
> +	ldr \ptr, [\regB], \val
> +	.endm
> +
> +	.macro str1 ptr, regB, val
> +	str \ptr, [\regB], \val
> +	.endm
> +
> +	.macro ldp1 ptr, regB, regC, val
> +	ldp \ptr, \regB, [\regC], \val
> +	.endm
> +
> +	.macro stp1 ptr, regB, regC, val
> +	stp \ptr, \regB, [\regC], \val
> +	.endm
> +
> +	.weak memcpy
> +ENTRY(memcpy_arm64)
> +ENTRY(memcpy)
> +#include "copy_template.S"
> +	ret
> +END(memcpy)
> +END(memcpy_arm64)
> diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library
> index 70378ba..87ff707 100644
> --- a/criu/pie/Makefile.library
> +++ b/criu/pie/Makefile.library
> @@ -31,6 +31,9 @@ ifeq ($(VDSO),y)
>           endif
>   endif
>   
> +ifeq ($(SRCARCH),aarch64)
> +	OBJS	+= ./$(ARCH_DIR)/memcpy.o
> +endif
>   ifeq ($(SRCARCH),ppc64)
>           OBJS		+= ./$(ARCH_DIR)/memcpy_power7.o		\
>   			   ./$(ARCH_DIR)/memcmp_64.o ./$(ARCH_DIR)/misc.o



More information about the CRIU mailing list