diff options
Diffstat (limited to 'arch/arm/lib')
-rw-r--r-- | arch/arm/lib/Makefile | 15 | ||||
-rw-r--r-- | arch/arm/lib/_divsi3.S | 143 | ||||
-rw-r--r-- | arch/arm/lib/_modsi3.S | 99 | ||||
-rw-r--r-- | arch/arm/lib/_udivsi3.S | 95 | ||||
-rw-r--r-- | arch/arm/lib/_umodsi3.S | 90 | ||||
-rw-r--r-- | arch/arm/lib/ashldi3.S (renamed from arch/arm/lib/_ashldi3.S) | 14 | ||||
-rw-r--r-- | arch/arm/lib/ashrdi3.S (renamed from arch/arm/lib/_ashrdi3.S) | 14 | ||||
-rw-r--r-- | arch/arm/lib/div64.S | 214 | ||||
-rw-r--r-- | arch/arm/lib/lib1funcs.S | 429 | ||||
-rw-r--r-- | arch/arm/lib/lshrdi3.S (renamed from arch/arm/lib/_lshrdi3.S) | 14 | ||||
-rw-r--r-- | arch/arm/lib/memcpy.S | 6 | ||||
-rw-r--r-- | arch/arm/lib/muldi3.S | 48 | ||||
-rw-r--r-- | arch/arm/lib/uldivmod.S (renamed from arch/arm/lib/_uldivmod.S) | 11 |
13 files changed, 738 insertions, 454 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index b535dbef49..0e05e87dea 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,9 +5,9 @@ # SPDX-License-Identifier: GPL-2.0+ # -lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _ashldi3.o _ashrdi3.o _divsi3.o \ - _lshrdi3.o _modsi3.o _udivsi3.o _umodsi3.o div0.o \ - _uldivmod.o +lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o lshrdi3.o \ + lib1funcs.o uldivmod.o div0.o \ + div64.o muldi3.o ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o @@ -62,9 +62,17 @@ ifneq (,$(findstring -mabi=aapcs-linux,$(PLATFORM_CPPFLAGS))) extra-y += eabi_compat.o endif +asflags-y += -DCONFIG_ARM_ASM_UNIFIED +ifeq ($(CONFIG_SPL_BUILD)$(CONFIG_TEGRA),yy) +asflags-y += -D__LINUX_ARM_ARCH__=4 +else +asflags-y += -D__LINUX_ARM_ARCH__=$(CONFIG_SYS_ARM_ARCH) +endif + # some files can only build in ARM or THUMB2, not THUMB1 ifdef CONFIG_SYS_THUMB_BUILD +asflags-$(CONFIG_HAS_THUMB2) += -DCONFIG_THUMB2_KERNEL ifndef CONFIG_HAS_THUMB2 # for C files, just apend -marm, which will override previous -mthumb* @@ -82,6 +90,5 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD - endif endif diff --git a/arch/arm/lib/_divsi3.S b/arch/arm/lib/_divsi3.S deleted file mode 100644 index c463c68f85..0000000000 --- a/arch/arm/lib/_divsi3.S +++ /dev/null @@ -1,143 +0,0 @@ -#include <linux/linkage.h> - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - -.macro ARM_DIV2_ORDER divisor, order - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm - - .align 5 -.globl __divsi3 -__divsi3: -ENTRY(__aeabi_idiv) - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 -ENDPROC(__aeabi_idiv) diff --git a/arch/arm/lib/_modsi3.S b/arch/arm/lib/_modsi3.S deleted file mode 100644 index c5e1c229df..0000000000 --- a/arch/arm/lib/_modsi3.S +++ /dev/null @@ -1,99 +0,0 @@ -#include <linux/linkage.h> - -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - .align 5 -ENTRY(__modsi3) - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr -ENDPROC(__modsi3) - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 diff --git a/arch/arm/lib/_udivsi3.S b/arch/arm/lib/_udivsi3.S deleted file mode 100644 index 3b653bed99..0000000000 --- a/arch/arm/lib/_udivsi3.S +++ /dev/null @@ -1,95 +0,0 @@ -#include <linux/linkage.h> - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -dividend .req r0 -divisor .req r1 -result .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .globl __udivsi3 - .type __udivsi3 ,function - .globl __aeabi_uidiv - .type __aeabi_uidiv ,function - .align 0 - __udivsi3: - __aeabi_uidiv: - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - bcc Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: - mov r0, result - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __udivsi3 , . - __udivsi3 - -ENTRY(__aeabi_uidivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_uidivmod) - -ENTRY(__aeabi_idivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/_umodsi3.S b/arch/arm/lib/_umodsi3.S deleted file mode 100644 index b1667376c5..0000000000 --- a/arch/arm/lib/_umodsi3.S +++ /dev/null @@ -1,90 +0,0 @@ -#include <linux/linkage.h> - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -/* # 145 "libgcc1.S" */ -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .type __umodsi3 ,function - .align 0 - ENTRY(__umodsi3) - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - cmp dividend, divisor - movcc pc, lr -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - moveq pc, lr @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __umodsi3 , . - __umodsi3 -/* # 320 "libgcc1.S" */ -/* # 421 "libgcc1.S" */ -/* # 433 "libgcc1.S" */ -/* # 456 "libgcc1.S" */ -/* # 500 "libgcc1.S" */ -/* # 580 "libgcc1.S" */ -ENDPROC(__umodsi3) diff --git a/arch/arm/lib/_ashldi3.S b/arch/arm/lib/ashldi3.S index 9c34c212cb..e9ec890881 100644 --- a/arch/arm/lib/_ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -5,6 +5,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -14,15 +15,20 @@ #define ah r1 #endif -.globl __ashldi3 -__ashldi3: +.pushsection .text.__ashldi3, "ax" +ENTRY(__ashldi3) ENTRY(__aeabi_llsl) subs r3, r2, #32 rsb ip, r2, #32 movmi ah, ah, lsl r2 movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 - mov pc, lr + ret lr + +ENDPROC(__ashldi3) ENDPROC(__aeabi_llsl) +.popsection diff --git a/arch/arm/lib/_ashrdi3.S b/arch/arm/lib/ashrdi3.S index c74fd64499..6e15774c0a 100644 --- a/arch/arm/lib/_ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -5,6 +5,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -14,15 +15,20 @@ #define ah r1 #endif -.globl __ashrdi3 -__ashrdi3: +.pushsection .text.__ashrdi3, "ax" +ENTRY(__ashrdi3) ENTRY(__aeabi_lasr) subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 - mov pc, lr + ret lr + +ENDPROC(__ashrdi3) ENDPROC(__aeabi_lasr) +.popsection diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 0000000000..b417db222d --- /dev/null +++ b/arch/arm/lib/div64.S @@ -0,0 +1,214 @@ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#ifdef __UBOOT__ +#define UNWIND(x...) +#endif + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +.pushsection .text.__do_div64, "ax" +ENTRY(__do_div64) +UNWIND(.fnstart) + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subscs xh, xh, yl + movsne ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + retlo lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + ret lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + reteq lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + ret lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + ret lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + reteq lr +UNWIND(.fnend) + +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 + +UNWIND(.fnend) +ENDPROC(__do_div64) +.popsection diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 0000000000..9bf93ceb14 --- /dev/null +++ b/arch/arm/lib/lib1funcs.S @@ -0,0 +1,429 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre <nico@fluxnic.net> + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + + * SPDX-License-Identifier: GPL-2.0+ + */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * U-Boot compatibility bit, define empty UNWIND() macro as, since we + * do not support stack unwinding and define CONFIG_AEABI to make all + * of the functions available without diverging from Linux code. + */ +#ifdef __UBOOT__ +#define UNWIND(x...) +#define CONFIG_AEABI +#endif + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed subtractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subsge \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/subtractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +.pushsection .text.__udivsi3, "ax" +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) + + subs r2, r1, #1 + reteq lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + ret lr + +11: moveq r0, #1 + movne r0, #0 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + ret lr + +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) +.popsection + +.pushsection .text.__umodsi3, "ax" +ENTRY(__umodsi3) +UNWIND(.fnstart) + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + retls lr + + ARM_MOD_BODY r0, r1, r2, r3 + + ret lr + +UNWIND(.fnend) +ENDPROC(__umodsi3) +.popsection + +.pushsection .text.__divsi3, "ax" +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + ret lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) +.popsection + +.pushsection .text.__modsi3, "ax" +ENTRY(__modsi3) +UNWIND(.fnstart) + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +UNWIND(.fnend) +ENDPROC(__modsi3) +.popsection + +#ifdef CONFIG_AEABI + +.pushsection .text.__aeabi_uidivmod, "ax" +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) +.popsection + +.pushsection .text.__aeabi_uidivmod, "ax" +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) +.popsection + +#endif + +.pushsection .text.Ldiv0, "ax" +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) + + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 + +UNWIND(.fnend) +ENDPROC(Ldiv0) +.popsection + +.pushsection .text.__gnu_thumb1_case_sqi, "ax" +/* Thumb-1 specialities */ +#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) +ENTRY(__gnu_thumb1_case_sqi) + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr +ENDPROC(__gnu_thumb1_case_sqi) +.popsection + +_.pushsection .text.__gnu_thumb1_case_uqi, "ax" +ENTRY(__gnu_thumb1_case_uqi) + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr +ENDPROC(__gnu_thumb1_case_uqi) +.popsection + +.pushsection .text.__gnu_thumb1_case_shi, "ax" +ENTRY(__gnu_thumb1_case_shi) + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr +ENDPROC(__gnu_thumb1_case_shi) +.popsection + +.pushsection .text.__gnu_thumb1_case_uhi, "ax" +ENTRY(__gnu_thumb1_case_uhi) + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr +ENDPROC(__gnu_thumb1_case_uhi) +.popsection +#endif diff --git a/arch/arm/lib/_lshrdi3.S b/arch/arm/lib/lshrdi3.S index 1f9b916464..ead33e53c7 100644 --- a/arch/arm/lib/_lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -5,6 +5,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -14,15 +15,20 @@ #define ah r1 #endif -.globl __lshrdi3 -__lshrdi3: +.pushsection .text.__lshldi3, "ax" +ENTRY(__lshrdi3) ENTRY(__aeabi_llsr) subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 - mov pc, lr + ret lr + +ENDPROC(__lshrdi3) ENDPROC(__aeabi_llsr) +.popsection diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7d9fc0f9be..00602e9cf8 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -13,12 +13,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> -#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMCPY_NO_THUMB_BUILD) -#define W(instr) instr.w -#else -#define W(instr) instr -#endif - #define LDR1W_SHIFT 0 #define STR1W_SHIFT 0 diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S new file mode 100644 index 0000000000..d7c93e702e --- /dev/null +++ b/arch/arm/lib/muldi3.S @@ -0,0 +1,48 @@ +/* + * linux/arch/arm/lib/muldi3.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +.pushsection .text.__muldi3, "ax" +ENTRY(__muldi3) +ENTRY(__aeabi_lmul) + + mul xh, yl, xh + mla xh, xl, yh, xh + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 + bic xl, xl, ip, lsl #16 + bic yl, yl, yh, lsl #16 + mla xh, yh, ip, xh + mul yh, xl, yh + mul xl, yl, xl + mul ip, yl, ip + adds xl, xl, yh, lsl #16 + adc xh, xh, yh, lsr #16 + adds xl, xl, ip, lsl #16 + adc xh, xh, ip, lsr #16 + ret lr + +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul) +.popsection diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/uldivmod.S index 426c2f2406..724699658b 100644 --- a/arch/arm/lib/_uldivmod.S +++ b/arch/arm/lib/uldivmod.S @@ -9,10 +9,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> -/* We don't use Thumb instructions for now */ -#define ARM(x...) x -#define THUMB(x...) - /* * A, Q = r0 + (r1 << 32) * B, R = r2 + (r3 << 32) @@ -37,7 +33,9 @@ THUMB( TMP .req r8 ) +.pushsection .text.__aeabi_uldivmod, "ax" ENTRY(__aeabi_uldivmod) + stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} @ Test if B == 0 orrs ip, B_0, B_1 @ Z set -> B == 0 @@ -226,7 +224,9 @@ THUMB( orrpl A_0, A_0, TMP ) @ Shift A to the right by the appropriate amount. rsb D_1, D_0, #32 mov Q_0, A_0, lsr D_0 - orr Q_0, A_1, lsl D_1 + ARM( orr Q_0, Q_0, A_1, lsl D_1 ) + THUMB( lsl A_1, D_1 ) + THUMB( orr Q_0, A_1 ) mov Q_1, A_1, lsr D_0 @ Move C to R mov R_0, C_0 @@ -243,3 +243,4 @@ L_div_by_0: mov R_1, #0 ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} ENDPROC(__aeabi_uldivmod) +.popsection |