[CRIU] [PATCH 3/4] ppc64: Use optimized memcmp
Laurent Dufour
ldufour at linux.vnet.ibm.com
Wed May 13 09:45:10 PDT 2015
Instead of belonging to the common C memcmp() function, belong on the
optimized one stolen from the kernel.
Signed-off-by: Laurent Dufour <ldufour at linux.vnet.ibm.com>
---
arch/ppc64/Makefile | 2 +-
arch/ppc64/include/asm/string.h | 3 +
arch/ppc64/memcmp_64.S | 236 ++++++++++++++++++++++++++++++++++++++++
pie/Makefile | 1 +
4 files changed, 241 insertions(+), 1 deletion(-)
create mode 100644 arch/ppc64/memcmp_64.S
diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile
index bd90a14e7fc4..6a5f8fdedde8 100644
--- a/arch/ppc64/Makefile
+++ b/arch/ppc64/Makefile
@@ -6,7 +6,7 @@ SYS-ASM := syscalls.S
syscalls-asm-y += $(SYS-ASM:.S=).o
crtools-obj-y += crtools.o
crtools-obj-y += cpu.o
-crtools-asm-y += memcpy_power7.o
+crtools-asm-y += memcpy_power7.o memcmp_64.o
SYS-DEF := syscall-ppc64.def
SYS-ASM-COMMON := syscall-common-ppc64.S
diff --git a/arch/ppc64/include/asm/string.h b/arch/ppc64/include/asm/string.h
index 38e968cd64c3..097b1ca5be94 100644
--- a/arch/ppc64/include/asm/string.h
+++ b/arch/ppc64/include/asm/string.h
@@ -4,6 +4,7 @@
#include "compiler.h"
#define HAS_BUILTIN_MEMCPY
+#define HAS_BUILTIN_MEMCMP
#include "asm-generic/string.h"
@@ -15,4 +16,6 @@ static inline void *builtin_memcpy(void *to, const void *from, unsigned long n)
return to;
}
+extern int builtin_memcmp(const void *cs, const void *ct, size_t count);
+
#endif /* __CR_ASM_STRING_H__ */
diff --git a/arch/ppc64/memcmp_64.S b/arch/ppc64/memcmp_64.S
new file mode 100644
index 000000000000..16c2b0cd8280
--- /dev/null
+++ b/arch/ppc64/memcmp_64.S
@@ -0,0 +1,236 @@
+/*
+ * Author: Anton Blanchard <anton at au.ibm.com>
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * --
+ * Copied form the linux file arch/powerpc/lib/memcmp_64.S
+ */
+#include "asm/linkage.h"
+
+#define off8 r6
+#define off16 r7
+#define off24 r8
+
+#define rA r9
+#define rB r10
+#define rC r11
+#define rD r27
+#define rE r28
+#define rF r29
+#define rG r30
+#define rH r31
+
+#ifdef __LITTLE_ENDIAN__
+#define LD ldbrx
+#else
+#define LD ldx
+#endif
+
+ENTRY(builtin_memcmp)
+ cmpdi cr1,r5,0
+
+ /* Use the short loop if both strings are not 8B aligned */
+ or r6,r3,r4
+ andi. r6,r6,7
+
+ /* Use the short loop if length is less than 32B */
+ cmpdi cr6,r5,31
+
+ beq cr1,.Lzero
+ bne .Lshort
+ bgt cr6,.Llong
+
+.Lshort:
+ mtctr r5
+
+1: lbz rA,0(r3)
+ lbz rB,0(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,1(r3)
+ lbz rB,1(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,2(r3)
+ lbz rB,2(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,3(r3)
+ lbz rB,3(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+
+ addi r3,r3,4
+ addi r4,r4,4
+
+ bdnz 1b
+
+.Lzero:
+ li r3,0
+ blr
+
+.Lnon_zero:
+ mr r3,rC
+ blr
+
+.Llong:
+ li off8,8
+ li off16,16
+ li off24,24
+
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+
+ srdi r0,r5,5
+ mtctr r0
+ andi. r5,r5,31
+
+ LD rA,0,r3
+ LD rB,0,r4
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdz .Lfirst32
+
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr1,rC,rD
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+ cmpld cr6,rE,rF
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+ bne cr1,.LcmpCD
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdz .Lsecond32
+
+ .balign 16
+
+1: LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr1,rC,rD
+ bne cr6,.LcmpEF
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+ cmpld cr6,rE,rF
+ bne cr7,.LcmpGH
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+ bne cr1,.LcmpCD
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdnz 1b
+
+.Lsecond32:
+ cmpld cr1,rC,rD
+ bne cr6,.LcmpEF
+
+ cmpld cr6,rE,rF
+ bne cr7,.LcmpGH
+
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ bne cr1,.LcmpCD
+ bne cr6,.LcmpEF
+ bne cr7,.LcmpGH
+
+.Ltail:
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+
+ cmpdi r5,0
+ beq .Lzero
+ b .Lshort
+
+.Lfirst32:
+ cmpld cr1,rC,rD
+ cmpld cr6,rE,rF
+ cmpld cr7,rG,rH
+
+ bne cr0,.LcmpAB
+ bne cr1,.LcmpCD
+ bne cr6,.LcmpEF
+ bne cr7,.LcmpGH
+
+ b .Ltail
+
+.LcmpAB:
+ li r3,1
+ bgt cr0,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpCD:
+ li r3,1
+ bgt cr1,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpEF:
+ li r3,1
+ bgt cr6,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpGH:
+ li r3,1
+ bgt cr7,.Lout
+ li r3,-1
+
+.Lout:
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+ blr
diff --git a/pie/Makefile b/pie/Makefile
index c67d8dc6c0ab..424bf34f27dd 100644
--- a/pie/Makefile
+++ b/pie/Makefile
@@ -13,6 +13,7 @@ endif
ifeq ($(SRCARCH), ppc64)
asm-e += $(ARCH_DIR)/vdso-trampoline.o
asm-e += $(ARCH_DIR)/memcpy_power7.o
+asm-e += $(ARCH_DIR)/memcmp_64.o
endif
endif
--
1.9.1
More information about the CRIU
mailing list