[CRIU] [PATCH 3/4] ppc64: Use optimized memcmp

Laurent Dufour ldufour at linux.vnet.ibm.com
Wed May 13 09:45:10 PDT 2015


Instead of belonging to the common C memcmp() function, belong on the
optimized one stolen from the kernel.

Signed-off-by: Laurent Dufour <ldufour at linux.vnet.ibm.com>
---
 arch/ppc64/Makefile             |   2 +-
 arch/ppc64/include/asm/string.h |   3 +
 arch/ppc64/memcmp_64.S          | 236 ++++++++++++++++++++++++++++++++++++++++
 pie/Makefile                    |   1 +
 4 files changed, 241 insertions(+), 1 deletion(-)
 create mode 100644 arch/ppc64/memcmp_64.S

diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
index bd90a14e7fc4..6a5f8fdedde8 100644
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -6,7 +6,7 @@ SYS-ASM		:= syscalls.S
 syscalls-asm-y	+= $(SYS-ASM:.S=).o
 crtools-obj-y	+= crtools.o
 crtools-obj-y	+= cpu.o
-crtools-asm-y	+= memcpy_power7.o
+crtools-asm-y	+= memcpy_power7.o memcmp_64.o
 
 SYS-DEF		:= syscall-ppc64.def
 SYS-ASM-COMMON	:= syscall-common-ppc64.S
diff --git a/arch/ppc64/include/asm/string.h b/arch/ppc64/include/asm/string.h
index 38e968cd64c3..097b1ca5be94 100644
--- a/arch/ppc64/include/asm/string.h
+++ b/arch/ppc64/include/asm/string.h
@@ -4,6 +4,7 @@
 #include "compiler.h"
 
 #define HAS_BUILTIN_MEMCPY
+#define HAS_BUILTIN_MEMCMP
 
 #include "asm-generic/string.h"
 
@@ -15,4 +16,6 @@ static inline void *builtin_memcpy(void *to, const void *from, unsigned long n)
 	return to;
 }
 
+extern int builtin_memcmp(const void *cs, const void *ct, size_t count);
+
 #endif /* __CR_ASM_STRING_H__ */
diff --git a/arch/ppc64/memcmp_64.S b/arch/ppc64/memcmp_64.S
new file mode 100644
index 000000000000..16c2b0cd8280
--- /dev/null
+++ b/arch/ppc64/memcmp_64.S
@@ -0,0 +1,236 @@
+/*
+ * Author: Anton Blanchard <anton at au.ibm.com>
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * --
+ * Copied form the linux file arch/powerpc/lib/memcmp_64.S
+ */
+#include "asm/linkage.h"
+
+#define off8	r6
+#define off16	r7
+#define off24	r8
+
+#define rA	r9
+#define rB	r10
+#define rC	r11
+#define rD	r27
+#define rE	r28
+#define rF	r29
+#define rG	r30
+#define rH	r31
+
+#ifdef __LITTLE_ENDIAN__
+#define LD	ldbrx
+#else
+#define LD	ldx
+#endif
+
+ENTRY(builtin_memcmp)
+	cmpdi	cr1,r5,0
+
+	/* Use the short loop if both strings are not 8B aligned */
+	or	r6,r3,r4
+	andi.	r6,r6,7
+
+	/* Use the short loop if length is less than 32B */
+	cmpdi	cr6,r5,31
+
+	beq	cr1,.Lzero
+	bne	.Lshort
+	bgt	cr6,.Llong
+
+.Lshort:
+	mtctr	r5
+
+1:	lbz	rA,0(r3)
+	lbz	rB,0(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+	bdz	.Lzero
+
+	lbz	rA,1(r3)
+	lbz	rB,1(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+	bdz	.Lzero
+
+	lbz	rA,2(r3)
+	lbz	rB,2(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+	bdz	.Lzero
+
+	lbz	rA,3(r3)
+	lbz	rB,3(r4)
+	subf.	rC,rB,rA
+	bne	.Lnon_zero
+
+	addi	r3,r3,4
+	addi	r4,r4,4
+
+	bdnz	1b
+
+.Lzero:
+	li	r3,0
+	blr
+
+.Lnon_zero:
+	mr	r3,rC
+	blr
+
+.Llong:
+	li	off8,8
+	li	off16,16
+	li	off24,24
+
+	std	r31,-8(r1)
+	std	r30,-16(r1)
+	std	r29,-24(r1)
+	std	r28,-32(r1)
+	std	r27,-40(r1)
+
+	srdi	r0,r5,5
+	mtctr	r0
+	andi.	r5,r5,31
+
+	LD	rA,0,r3
+	LD	rB,0,r4
+
+	LD	rC,off8,r3
+	LD	rD,off8,r4
+
+	LD	rE,off16,r3
+	LD	rF,off16,r4
+
+	LD	rG,off24,r3
+	LD	rH,off24,r4
+	cmpld	cr0,rA,rB
+
+	addi	r3,r3,32
+	addi	r4,r4,32
+
+	bdz	.Lfirst32
+
+	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr1,rC,rD
+
+	LD	rC,off8,r3
+	LD	rD,off8,r4
+	cmpld	cr6,rE,rF
+
+	LD	rE,off16,r3
+	LD	rF,off16,r4
+	cmpld	cr7,rG,rH
+	bne	cr0,.LcmpAB
+
+	LD	rG,off24,r3
+	LD	rH,off24,r4
+	cmpld	cr0,rA,rB
+	bne	cr1,.LcmpCD
+
+	addi	r3,r3,32
+	addi	r4,r4,32
+
+	bdz	.Lsecond32
+
+	.balign	16
+
+1:	LD	rA,0,r3
+	LD	rB,0,r4
+	cmpld	cr1,rC,rD
+	bne	cr6,.LcmpEF
+
+	LD	rC,off8,r3
+	LD	rD,off8,r4
+	cmpld	cr6,rE,rF
+	bne	cr7,.LcmpGH
+
+	LD	rE,off16,r3
+	LD	rF,off16,r4
+	cmpld	cr7,rG,rH
+	bne	cr0,.LcmpAB
+
+	LD	rG,off24,r3
+	LD	rH,off24,r4
+	cmpld	cr0,rA,rB
+	bne	cr1,.LcmpCD
+
+	addi	r3,r3,32
+	addi	r4,r4,32
+
+	bdnz	1b
+
+.Lsecond32:
+	cmpld	cr1,rC,rD
+	bne	cr6,.LcmpEF
+
+	cmpld	cr6,rE,rF
+	bne	cr7,.LcmpGH
+
+	cmpld	cr7,rG,rH
+	bne	cr0,.LcmpAB
+
+	bne	cr1,.LcmpCD
+	bne	cr6,.LcmpEF
+	bne	cr7,.LcmpGH
+
+.Ltail:
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+
+	cmpdi	r5,0
+	beq	.Lzero
+	b	.Lshort
+
+.Lfirst32:
+	cmpld	cr1,rC,rD
+	cmpld	cr6,rE,rF
+	cmpld	cr7,rG,rH
+
+	bne	cr0,.LcmpAB
+	bne	cr1,.LcmpCD
+	bne	cr6,.LcmpEF
+	bne	cr7,.LcmpGH
+
+	b	.Ltail
+
+.LcmpAB:
+	li	r3,1
+	bgt	cr0,.Lout
+	li	r3,-1
+	b	.Lout
+
+.LcmpCD:
+	li	r3,1
+	bgt	cr1,.Lout
+	li	r3,-1
+	b	.Lout
+
+.LcmpEF:
+	li	r3,1
+	bgt	cr6,.Lout
+	li	r3,-1
+	b	.Lout
+
+.LcmpGH:
+	li	r3,1
+	bgt	cr7,.Lout
+	li	r3,-1
+
+.Lout:
+	ld	r31,-8(r1)
+	ld	r30,-16(r1)
+	ld	r29,-24(r1)
+	ld	r28,-32(r1)
+	ld	r27,-40(r1)
+	blr
diff --git a/pie/Makefile b/pie/Makefile
index c67d8dc6c0ab..424bf34f27dd 100644
--- a/pie/Makefile
+++ b/pie/Makefile
@@ -13,6 +13,7 @@ endif
 ifeq ($(SRCARCH), ppc64)
 asm-e			+= $(ARCH_DIR)/vdso-trampoline.o
 asm-e			+= $(ARCH_DIR)/memcpy_power7.o
+asm-e			+= $(ARCH_DIR)/memcmp_64.o
 endif
 endif
 
-- 
1.9.1



More information about the CRIU mailing list