summaryrefslogtreecommitdiff
path: root/arch/arc/lib/strchr-700.S
diff options
context:
space:
mode:
authorStefano Babic <sbabic@denx.de>2014-03-05 12:51:26 +0100
committerStefano Babic <sbabic@denx.de>2014-03-05 12:51:26 +0100
commit1ad6364eeb4f578e423081d1748e8a3fdf1ab01d (patch)
treef55731737edf1cfd653b21f2ff9d387e6c53ae24 /arch/arc/lib/strchr-700.S
parent335143c76612a0ae26eef8abeda77641d4f63b50 (diff)
parentcc07294bc704694ae33db75b25ac557e5917a83f (diff)
Merge branch 'master' of git://git.denx.de/u-boot-arm
Diffstat (limited to 'arch/arc/lib/strchr-700.S')
-rw-r--r--arch/arc/lib/strchr-700.S141
1 files changed, 141 insertions, 0 deletions
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000000..55fcc9fb00
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ */
+
+/*
+ * ARC700 has a relatively long pipeline and branch prediction, so we want
+ * to avoid branches that are hard to predict. On the other hand, the
+ * presence of the norm instruction makes it easier to operate on whole
+ * words branch-free.
+ */
+
+.global strchr
+.align 4
+strchr:
+ extb_s %r1, %r1
+ asl %r5, %r1, 8
+ bmsk %r2, %r0, 1
+ or %r5, %r5, %r1
+ mov_s %r3, 0x01010101
+ breq.d %r2, %r0, .Laligned
+ asl %r4, %r5, 16
+ sub_s %r0, %r0, %r2
+ asl %r7, %r2, 3
+ ld_s %r2, [%r0]
+#ifdef __LITTLE_ENDIAN__
+ asl %r7, %r3, %r7
+#else /* __BIG_ENDIAN__ */
+ lsr %r7, %r3, %r7
+#endif /* _ENDIAN__ */
+ or %r5, %r5, %r4
+ ror %r4, %r3
+ sub %r12, %r2, %r7
+ bic_s %r12, %r12, %r2
+ and %r12, %r12, %r4
+ brne.d %r12, 0, .Lfound0_ua
+ xor %r6, %r2, %r5
+ ld.a %r2, [%r0, 4]
+ sub %r12, %r6, %r7
+ bic %r12, %r12, %r6
+#ifdef __LITTLE_ENDIAN__
+ and %r7, %r12, %r4
+ /* For speed, we want this branch to be unaligned. */
+ breq %r7, 0, .Loop
+ /* Likewise this one */
+ b .Lfound_char
+#else /* __BIG_ENDIAN__ */
+ and %r12, %r12, %r4
+ /* For speed, we want this branch to be unaligned. */
+ breq %r12, 0, .Loop
+ lsr_s %r12, %r12, 7
+ bic %r2, %r7, %r6
+ b.d .Lfound_char_b
+ and_s %r2, %r2, %r12
+#endif /* _ENDIAN__ */
+ /* We require this code address to be unaligned for speed... */
+.Laligned:
+ ld_s %r2, [%r0]
+ or %r5, %r5, %r4
+ ror %r4, %r3
+ /* ... so that this code address is aligned, for itself and ... */
+.Loop:
+ sub %r12, %r2, %r3
+ bic_s %r12, %r12, %r2
+ and %r12, %r12, %r4
+ brne.d %r12, 0, .Lfound0
+ xor %r6, %r2, %r5
+ ld.a %r2, [%r0, 4]
+ sub %r12, %r6, %r3
+ bic %r12, %r12, %r6
+ and %r7, %r12, %r4
+ breq %r7, 0, .Loop
+ /*
+ *... so that this branch is unaligned.
+ * Found searched-for character.
+ * r0 has already advanced to next word.
+ */
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * We only need the information about the first matching byte
+ * (i.e. the least significant matching byte) to be exact,
+ * hence there is no problem with carry effects.
+ */
+.Lfound_char:
+ sub %r3, %r7, 1
+ bic %r3, %r3, %r7
+ norm %r2, %r3
+ sub_s %r0, %r0, 1
+ asr_s %r2, %r2, 3
+ j.d [%blink]
+ sub_s %r0, %r0, %r2
+
+ .balign 4
+.Lfound0_ua:
+ mov %r3, %r7
+.Lfound0:
+ sub %r3, %r6, %r3
+ bic %r3, %r3, %r6
+ and %r2, %r3, %r4
+ or_s %r12, %r12, %r2
+ sub_s %r3, %r12, 1
+ bic_s %r3, %r3, %r12
+ norm %r3, %r3
+ add_s %r0, %r0, 3
+ asr_s %r12, %r3, 3
+ asl.f 0, %r2, %r3
+ sub_s %r0, %r0, %r12
+ j_s.d [%blink]
+ mov.pl %r0, 0
+#else /* __BIG_ENDIAN__ */
+.Lfound_char:
+ lsr %r7, %r7, 7
+
+ bic %r2, %r7, %r6
+.Lfound_char_b:
+ norm %r2, %r2
+ sub_s %r0, %r0, 4
+ asr_s %r2, %r2, 3
+ j.d [%blink]
+ add_s %r0, %r0, %r2
+
+.Lfound0_ua:
+ mov_s %r3, %r7
+.Lfound0:
+ asl_s %r2, %r2, 7
+ or %r7, %r6, %r4
+ bic_s %r12, %r12, %r2
+ sub %r2, %r7, %r3
+ or %r2, %r2, %r6
+ bic %r12, %r2, %r12
+ bic.f %r3, %r4, %r12
+ norm %r3, %r3
+
+ add.pl %r3, %r3, 1
+ asr_s %r12, %r3, 3
+ asl.f 0, %r2, %r3
+ add_s %r0, %r0, %r12
+ j_s.d [%blink]
+ mov.mi %r0, 0
+#endif /* _ENDIAN__ */