summaryrefslogtreecommitdiff
path: root/arch/arc/lib/strcpy-700.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arc/lib/strcpy-700.S')
-rw-r--r--arch/arc/lib/strcpy-700.S67
1 files changed, 67 insertions, 0 deletions
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000000..41bb53e501
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+/*
+ * If dst and src are 4 byte aligned, copy 8 bytes at a time.
+ * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+ * it 8 byte aligned. Thus, we can do a little read-ahead, without
+ * dereferencing a cache line that we should not touch.
+ * Note that short and long instructions have been scheduled to avoid
+ * branch stalls.
+ * The beq_s to r3z could be made unaligned & long to avoid a stall
+ * there, but it is not likely to be taken often, and it would also be likely
+ * to cost an unaligned mispredict at the next call.
+ */
+
+.global strcpy
+.align 4
+strcpy:
+ or %r2, %r0, %r1
+ bmsk_s %r2, %r2, 1
+ brne.d %r2, 0, charloop
+ mov_s %r10, %r0
+ ld_s %r3, [%r1, 0]
+ mov %r8, 0x01010101
+ bbit0.d %r1, 2, loop_start
+ ror %r12, %r8
+ sub %r2, %r3, %r8
+ bic_s %r2, %r2, %r3
+ tst_s %r2,%r12
+ bne r3z
+ mov_s %r4,%r3
+ .balign 4
+loop:
+ ld.a %r3, [%r1, 4]
+ st.ab %r4, [%r10, 4]
+loop_start:
+ ld.a %r4, [%r1, 4]
+ sub %r2, %r3, %r8
+ bic_s %r2, %r2, %r3
+ tst_s %r2, %r12
+ bne_s r3z
+ st.ab %r3, [%r10, 4]
+ sub %r2, %r4, %r8
+ bic %r2, %r2, %r4
+ tst %r2, %r12
+ beq loop
+ mov_s %r3, %r4
+#ifdef __LITTLE_ENDIAN__
+r3z: bmsk.f %r1, %r3, 7
+ lsr_s %r3, %r3, 8
+#else /* __BIG_ENDIAN__ */
+r3z: lsr.f %r1, %r3, 24
+ asl_s %r3, %r3, 8
+#endif /* _ENDIAN__ */
+ bne.d r3z
+ stb.ab %r1, [%r10, 1]
+ j_s [%blink]
+
+ .balign 4
+charloop:
+ ldb.ab %r3, [%r1, 1]
+ brne.d %r3, 0, charloop
+ stb.ab %r3, [%r10, 1]
+ j [%blink]