summaryrefslogtreecommitdiff
path: root/linux/arch/blackfin/lib/udivsi3.S
diff options
context:
space:
mode:
authorMichael J. Chudobiak <mjc@avtechpulse.com>2016-04-25 10:00:44 -0400
committerMichael J. Chudobiak <mjc@avtechpulse.com>2016-04-25 10:00:44 -0400
commita1df417e74aa6dae7352dc8cbb0ad471af5b7c69 (patch)
treec34b2311e37ea31db153c90cb8f4570374d05e78 /linux/arch/blackfin/lib/udivsi3.S
initial Olimex linux tree from Daniel, originally Feb 3, 2016
Diffstat (limited to 'linux/arch/blackfin/lib/udivsi3.S')
-rw-r--r--linux/arch/blackfin/lib/udivsi3.S277
1 files changed, 277 insertions, 0 deletions
diff --git a/linux/arch/blackfin/lib/udivsi3.S b/linux/arch/blackfin/lib/udivsi3.S
new file mode 100644
index 00000000..748a6a2e
--- /dev/null
+++ b/linux/arch/blackfin/lib/udivsi3.S
@@ -0,0 +1,277 @@
+/*
+ * Copyright 2004-2009 Analog Devices Inc.
+ *
+ * Licensed under the Clear BSD license or the GPL-2 (or later)
+ */
+
+#include <linux/linkage.h>
+
+#define CARRY AC0
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+
+ENTRY(___udivsi3)
+
+ CC = R0 < R1 (IU); /* If X < Y, always return 0 */
+ IF CC JUMP .Lreturn_ident;
+
+ R2 = R1 << 16;
+ CC = R2 <= R0 (IU);
+ IF CC JUMP .Lidents;
+
+ R2 = R0 >> 31; /* if X is a 31-bit number */
+ R3 = R1 >> 15; /* and Y is a 15-bit number */
+ R2 = R2 | R3; /* then it's okay to use the DIVQ builtins (fallthrough to fast)*/
+ CC = R2;
+ IF CC JUMP .Ly_16bit;
+
+/* METHOD 1: FAST DIVQ
+ We know we have a 31-bit dividend, and 15-bit divisor so we can use the
+ simple divq approach (first setting AQ to 0 - implying unsigned division,
+ then 16 DIVQ's).
+*/
+
+ AQ = CC; /* Clear AQ (CC==0) */
+
+/* ISR States: When dividing two integers (32.0/16.0) using divide primitives,
+ we need to shift the dividend one bit to the left.
+ We have already checked that we have a 31-bit number so we are safe to do
+ that.
+*/
+ R0 <<= 1;
+ DIVQ(R0, R1); // 1
+ DIVQ(R0, R1); // 2
+ DIVQ(R0, R1); // 3
+ DIVQ(R0, R1); // 4
+ DIVQ(R0, R1); // 5
+ DIVQ(R0, R1); // 6
+ DIVQ(R0, R1); // 7
+ DIVQ(R0, R1); // 8
+ DIVQ(R0, R1); // 9
+ DIVQ(R0, R1); // 10
+ DIVQ(R0, R1); // 11
+ DIVQ(R0, R1); // 12
+ DIVQ(R0, R1); // 13
+ DIVQ(R0, R1); // 14
+ DIVQ(R0, R1); // 15
+ DIVQ(R0, R1); // 16
+ R0 = R0.L (Z);
+ RTS;
+
+.Ly_16bit:
+ /* We know that the upper 17 bits of Y might have bits set,
+ ** or that the sign bit of X might have a bit. If Y is a
+ ** 16-bit number, but not bigger, then we can use the builtins
+ ** with a post-divide correction.
+ ** R3 currently holds Y>>15, which means R3's LSB is the
+ ** bit we're interested in.
+ */
+
+ /* According to the ISR, to use the Divide primitives for
+ ** unsigned integer divide, the useable range is 31 bits
+ */
+ CC = ! BITTST(R0, 31);
+
+ /* IF condition is true we can scale our inputs and use the divide primitives,
+ ** with some post-adjustment
+ */
+ R3 += -1; /* if so, Y is 0x00008nnn */
+ CC &= AZ;
+
+ /* If condition is true we can scale our inputs and use the divide primitives,
+ ** with some post-adjustment
+ */
+ R3 = R1 >> 1; /* Pre-scaled divisor for primitive case */
+ R2 = R0 >> 16;
+
+ R2 = R3 - R2; /* shifted divisor < upper 16 bits of dividend */
+ CC &= CARRY;
+ IF CC JUMP .Lshift_and_correct;
+
+ /* Fall through to the identities */
+
+/* METHOD 2: identities and manual calculation
+ We are not able to use the divide primites, but may still catch some special
+ cases.
+*/
+.Lidents:
+ /* Test for common identities. Value to be returned is placed in R2. */
+ CC = R0 == 0; /* 0/Y => 0 */
+ IF CC JUMP .Lreturn_r0;
+ CC = R0 == R1; /* X==Y => 1 */
+ IF CC JUMP .Lreturn_ident;
+ CC = R1 == 1; /* X/1 => X */
+ IF CC JUMP .Lreturn_ident;
+
+ R2.L = ONES R1;
+ R2 = R2.L (Z);
+ CC = R2 == 1;
+ IF CC JUMP .Lpower_of_two;
+
+ [--SP] = (R7:5); /* Push registers R5-R7 */
+
+ /* Idents don't match. Go for the full operation. */
+
+
+ R6 = 2; /* assume we'll shift two */
+ R3 = 1;
+
+ P2 = R1;
+ /* If either R0 or R1 have sign set, */
+ /* divide them by two, and note it's */
+ /* been done. */
+ CC = R1 < 0;
+ R2 = R1 >> 1;
+ IF CC R1 = R2; /* Possibly-shifted R1 */
+ IF !CC R6 = R3; /* R1 doesn't, so at most 1 shifted */
+
+ P0 = 0;
+ R3 = -R1;
+ [--SP] = R3;
+ R2 = R0 >> 1;
+ R2 = R0 >> 1;
+ CC = R0 < 0;
+ IF CC P0 = R6; /* Number of values divided */
+ IF !CC R2 = R0; /* Shifted R0 */
+
+ /* P0 is 0, 1 (NR/=2) or 2 (NR/=2, DR/=2) */
+
+ /* r2 holds Copy dividend */
+ R3 = 0; /* Clear partial remainder */
+ R7 = 0; /* Initialise quotient bit */
+
+ P1 = 32; /* Set loop counter */
+ LSETUP(.Lulst, .Lulend) LC0 = P1; /* Set loop counter */
+.Lulst: R6 = R2 >> 31; /* R6 = sign bit of R2, for carry */
+ R2 = R2 << 1; /* Shift 64 bit dividend up by 1 bit */
+ R3 = R3 << 1 || R5 = [SP];
+ R3 = R3 | R6; /* Include any carry */
+ CC = R7 < 0; /* Check quotient(AQ) */
+ /* If AQ==0, we'll sub divisor */
+ IF CC R5 = R1; /* and if AQ==1, we'll add it. */
+ R3 = R3 + R5; /* Add/sub divsor to partial remainder */
+ R7 = R3 ^ R1; /* Generate next quotient bit */
+
+ R5 = R7 >> 31; /* Get AQ */
+ BITTGL(R5, 0); /* Invert it, to get what we'll shift */
+.Lulend: R2 = R2 + R5; /* and "shift" it in. */
+
+ CC = P0 == 0; /* Check how many inputs we shifted */
+ IF CC JUMP .Lno_mult; /* if none... */
+ R6 = R2 << 1;
+ CC = P0 == 1;
+ IF CC R2 = R6; /* if 1, Q = Q*2 */
+ IF !CC R1 = P2; /* if 2, restore stored divisor */
+
+ R3 = R2; /* Copy of R2 */
+ R3 *= R1; /* Q * divisor */
+ R5 = R0 - R3; /* Z = (dividend - Q * divisor) */
+ CC = R1 <= R5 (IU); /* Check if divisor <= Z? */
+ R6 = CC; /* if yes, R6 = 1 */
+ R2 = R2 + R6; /* if yes, add one to quotient(Q) */
+.Lno_mult:
+ SP += 4;
+ (R7:5) = [SP++]; /* Pop registers R5-R7 */
+ R0 = R2; /* Store quotient */
+ RTS;
+
+.Lreturn_ident:
+ CC = R0 < R1 (IU); /* If X < Y, always return 0 */
+ R2 = 0;
+ IF CC JUMP .Ltrue_return_ident;
+ R2 = -1 (X); /* X/0 => 0xFFFFFFFF */
+ CC = R1 == 0;
+ IF CC JUMP .Ltrue_return_ident;
+ R2 = -R2; /* R2 now 1 */
+ CC = R0 == R1; /* X==Y => 1 */
+ IF CC JUMP .Ltrue_return_ident;
+ R2 = R0; /* X/1 => X */
+ /*FALLTHRU*/
+
+.Ltrue_return_ident:
+ R0 = R2;
+.Lreturn_r0:
+ RTS;
+
+.Lpower_of_two:
+ /* Y has a single bit set, which means it's a power of two.
+ ** That means we can perform the division just by shifting
+ ** X to the right the appropriate number of bits
+ */
+
+ /* signbits returns the number of sign bits, minus one.
+ ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
+ ** to shift right n-signbits spaces. It also means 0x80000000
+ ** is a special case, because that *also* gives a signbits of 0
+ */
+
+ R2 = R0 >> 31;
+ CC = R1 < 0;
+ IF CC JUMP .Ltrue_return_ident;
+
+ R1.l = SIGNBITS R1;
+ R1 = R1.L (Z);
+ R1 += -30;
+ R0 = LSHIFT R0 by R1.L;
+ RTS;
+
+/* METHOD 3: PRESCALE AND USE THE DIVIDE PRIMITIVES WITH SOME POST-CORRECTION
+ Two scaling operations are required to use the divide primitives with a
+ divisor > 0x7FFFF.
+ Firstly (as in method 1) we need to shift the dividend 1 to the left for
+ integer division.
+ Secondly we need to shift both the divisor and dividend 1 to the right so
+ both are in range for the primitives.
+ The left/right shift of the dividend does nothing so we can skip it.
+*/
+.Lshift_and_correct:
+ R2 = R0;
+ // R3 is already R1 >> 1
+ CC=!CC;
+ AQ = CC; /* Clear AQ, got here with CC = 0 */
+ DIVQ(R2, R3); // 1
+ DIVQ(R2, R3); // 2
+ DIVQ(R2, R3); // 3
+ DIVQ(R2, R3); // 4
+ DIVQ(R2, R3); // 5
+ DIVQ(R2, R3); // 6
+ DIVQ(R2, R3); // 7
+ DIVQ(R2, R3); // 8
+ DIVQ(R2, R3); // 9
+ DIVQ(R2, R3); // 10
+ DIVQ(R2, R3); // 11
+ DIVQ(R2, R3); // 12
+ DIVQ(R2, R3); // 13
+ DIVQ(R2, R3); // 14
+ DIVQ(R2, R3); // 15
+ DIVQ(R2, R3); // 16
+
+ /* According to the Instruction Set Reference:
+ To divide by a divisor > 0x7FFF,
+ 1. prescale and perform divide to obtain quotient (Q) (done above),
+ 2. multiply quotient by unscaled divisor (result M)
+ 3. subtract the product from the divident to get an error (E = X - M)
+ 4. if E < divisor (Y) subtract 1, if E > divisor (Y) add 1, else return quotient (Q)
+ */
+ R3 = R2.L (Z); /* Q = X' / Y' */
+ R2 = R3; /* Preserve Q */
+ R2 *= R1; /* M = Q * Y */
+ R2 = R0 - R2; /* E = X - M */
+ R0 = R3; /* Copy Q into result reg */
+
+/* Correction: If result of the multiply is negative, we overflowed
+ and need to correct the result by subtracting 1 from the result.*/
+ R3 = 0xFFFF (Z);
+ R2 = R2 >> 16; /* E >> 16 */
+ CC = R2 == R3;
+ R3 = 1 ;
+ R1 = R0 - R3;
+ IF CC R0 = R1;
+ RTS;
+
+ENDPROC(___udivsi3)