summaryrefslogtreecommitdiff
path: root/arch/arc/lib
diff options
context:
space:
mode:
authorTom Rini <trini@konsulko.com>2018-03-23 09:31:24 -0400
committerTom Rini <trini@konsulko.com>2018-03-23 09:31:24 -0400
commit423effc04a195ce6a464eadadfa7f765bf786889 (patch)
tree7d4d3ccda5e088e4bbad8cffc3b6f965fb5c5179 /arch/arc/lib
parentb0af10443afcb9e0dcfe18a7b8a013b230df9e39 (diff)
parentf665c14f04d225597cd3cb0c6faa35ddc2a1b2df (diff)
Merge git://git.denx.de/u-boot-arc
Alexey: 1. Significantly rework cache-related functionality. In particular that fixes coherency problems in some corner-cases, allows us to enable and disable caches in run-time and still have properly running system, finally support execution from real flash (before we used to run from DDR from the very beginning). 2. Remove string routines implemented in assembly. That allows us to build and run U-Boot on wide range of ARC cores with different configurations. I.e. whatever tuning is used on GCC's command-line we'll get code for desired flavor of ARC. Otherwise for each and every corner-case we would need to add ifdefs in assembly code to accommodate missing instructions etc. 3. Get use of GCC's garbage collector which helps to slim-down resulting image quite a bit. 4. Also now we may disable U-Boot self-relocation for ARC if needed either by platform or for debugging purposes.
Diffstat (limited to 'arch/arc/lib')
-rw-r--r--arch/arc/lib/Makefile7
-rw-r--r--arch/arc/lib/bootm.c55
-rw-r--r--arch/arc/lib/cache.c677
-rw-r--r--arch/arc/lib/init_helpers.c6
-rw-r--r--arch/arc/lib/memcmp.S123
-rw-r--r--arch/arc/lib/memcpy-700.S63
-rw-r--r--arch/arc/lib/memset.S62
-rw-r--r--arch/arc/lib/relocate.c6
-rw-r--r--arch/arc/lib/start.S28
-rw-r--r--arch/arc/lib/strchr-700.S141
-rw-r--r--arch/arc/lib/strcmp.S97
-rw-r--r--arch/arc/lib/strcpy-700.S67
-rw-r--r--arch/arc/lib/strlen.S80
13 files changed, 485 insertions, 927 deletions
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index 12097bf3be..6b7fb0fdff 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -10,13 +10,6 @@ obj-y += cache.o
obj-y += cpu.o
obj-y += interrupts.o
obj-y += relocate.o
-obj-y += strchr-700.o
-obj-y += strcmp.o
-obj-y += strcpy-700.o
-obj-y += strlen.o
-obj-y += memcmp.o
-obj-y += memcpy-700.o
-obj-y += memset.o
obj-y += reset.o
obj-y += ints_low.o
obj-y += init_helpers.o
diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c
index 4d4acff239..4f04aad34a 100644
--- a/arch/arc/lib/bootm.c
+++ b/arch/arc/lib/bootm.c
@@ -4,6 +4,7 @@
* SPDX-License-Identifier: GPL-2.0+
*/
+#include <asm/cache.h>
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
@@ -40,41 +41,52 @@ void arch_lmb_reserve(struct lmb *lmb)
static int cleanup_before_linux(void)
{
disable_interrupts();
- flush_dcache_all();
- invalidate_icache_all();
+ sync_n_cleanup_cache_all();
return 0;
}
+__weak int board_prep_linux(bootm_headers_t *images) { return 0; }
+
/* Subcommand: PREP */
-static void boot_prep_linux(bootm_headers_t *images)
+static int boot_prep_linux(bootm_headers_t *images)
{
- if (image_setup_linux(images))
- hang();
+ int ret;
+
+ ret = image_setup_linux(images);
+ if (ret)
+ return ret;
+
+ return board_prep_linux(images);
}
-__weak void smp_set_core_boot_addr(unsigned long addr, int corenr) {}
-__weak void smp_kick_all_cpus(void) {}
+/* Generic implementation for single core CPU */
+__weak void board_jump_and_run(ulong entry, int zero, int arch, uint params)
+{
+ void (*kernel_entry)(int zero, int arch, uint params);
+
+ kernel_entry = (void (*)(int, int, uint))entry;
+
+ kernel_entry(zero, arch, params);
+}
/* Subcommand: GO */
static void boot_jump_linux(bootm_headers_t *images, int flag)
{
- void (*kernel_entry)(int zero, int arch, uint params);
+ ulong kernel_entry;
unsigned int r0, r2;
int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
- kernel_entry = (void (*)(int, int, uint))images->ep;
+ kernel_entry = images->ep;
debug("## Transferring control to Linux (at address %08lx)...\n",
- (ulong) kernel_entry);
+ kernel_entry);
bootstage_mark(BOOTSTAGE_ID_RUN_OS);
printf("\nStarting kernel ...%s\n\n", fake ?
"(fake run for tracing)" : "");
bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
- cleanup_before_linux();
-
if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
r0 = 2;
r2 = (unsigned int)images->ft_addr;
@@ -83,11 +95,10 @@ static void boot_jump_linux(bootm_headers_t *images, int flag)
r2 = (unsigned int)env_get("bootargs");
}
- if (!fake) {
- smp_set_core_boot_addr((unsigned long)kernel_entry, -1);
- smp_kick_all_cpus();
- kernel_entry(r0, 0, r2);
- }
+ cleanup_before_linux();
+
+ if (!fake)
+ board_jump_and_run(kernel_entry, r0, 0, r2);
}
int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
@@ -96,17 +107,13 @@ int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
return -1;
- if (flag & BOOTM_STATE_OS_PREP) {
- boot_prep_linux(images);
- return 0;
- }
+ if (flag & BOOTM_STATE_OS_PREP)
+ return boot_prep_linux(images);
if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
boot_jump_linux(images, flag);
return 0;
}
- boot_prep_linux(images);
- boot_jump_linux(images, flag);
- return 0;
+ return -1;
}
diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c
index 04f1d9d59b..8203fae145 100644
--- a/arch/arc/lib/cache.c
+++ b/arch/arc/lib/cache.c
@@ -10,8 +10,145 @@
#include <linux/kernel.h>
#include <linux/log2.h>
#include <asm/arcregs.h>
+#include <asm/arc-bcr.h>
#include <asm/cache.h>
+/*
+ * [ NOTE 1 ]:
+ * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable
+ * operation may result in unexpected behavior and data loss even if we flush
+ * data cache right before invalidation. That may happens if we store any context
+ * on stack (like we store BLINK register on stack before function call).
+ * BLINK register is the register where return address is automatically saved
+ * when we do function call with instructions like 'bl'.
+ *
+ * There is the real example:
+ * We may hang in the next code as we store any BLINK register on stack in
+ * invalidate_dcache_all() function.
+ *
+ * void flush_dcache_all() {
+ * __dc_entire_op(OP_FLUSH);
+ * // Other code //
+ * }
+ *
+ * void invalidate_dcache_all() {
+ * __dc_entire_op(OP_INV);
+ * // Other code //
+ * }
+ *
+ * void foo(void) {
+ * flush_dcache_all();
+ * invalidate_dcache_all();
+ * }
+ *
+ * Now let's see what really happens during that code execution:
+ *
+ * foo()
+ * |->> call flush_dcache_all
+ * [return address is saved to BLINK register]
+ * [push BLINK] (save to stack) ![point 1]
+ * |->> call __dc_entire_op(OP_FLUSH)
+ * [return address is saved to BLINK register]
+ * [flush L1 D$]
+ * return [jump to BLINK]
+ * <<------
+ * [other flush_dcache_all code]
+ * [pop BLINK] (get from stack)
+ * return [jump to BLINK]
+ * <<------
+ * |->> call invalidate_dcache_all
+ * [return address is saved to BLINK register]
+ * [push BLINK] (save to stack) ![point 2]
+ * |->> call __dc_entire_op(OP_FLUSH)
+ * [return address is saved to BLINK register]
+ * [invalidate L1 D$] ![point 3]
+ * // Oops!!!
+ * // We lose return address from invalidate_dcache_all function:
+ * // we save it to stack and invalidate L1 D$ after that!
+ * return [jump to BLINK]
+ * <<------
+ * [other invalidate_dcache_all code]
+ * [pop BLINK] (get from stack)
+ * // we don't have this data in L1 dcache as we invalidated it in [point 3]
+ * // so we get it from next memory level (for example DDR memory)
+ * // but in the memory we have value which we save in [point 1], which
+ * // is return address from flush_dcache_all function (instead of
+ * // address from current invalidate_dcache_all function which we
+ * // saved in [point 2] !)
+ * return [jump to BLINK]
+ * <<------
+ * // As BLINK points to invalidate_dcache_all, we call it again and
+ * // loop forever.
+ *
+ * Fortunately we may fix that by using flush & invalidation of D$ with a single
+ * one instruction (instead of flush and invalidation instructions pair) and
+ * enabling force function inline with '__attribute__((always_inline))' gcc
+ * attribute to avoid any function call (and BLINK store) between cache flush
+ * and disable.
+ *
+ *
+ * [ NOTE 2 ]:
+ * As of today we only support the following cache configurations on ARC.
+ * Other configurations may exist in HW (for example, since version 3.0 HS
+ * supports SL$ (L2 system level cache) disable) but we don't support it in SW.
+ * Configuration 1:
+ * ______________________
+ * | |
+ * | ARC CPU |
+ * |______________________|
+ * ___|___ ___|___
+ * | | | |
+ * | L1 I$ | | L1 D$ |
+ * |_______| |_______|
+ * on/off on/off
+ * ___|______________|____
+ * | |
+ * | main memory |
+ * |______________________|
+ *
+ * Configuration 2:
+ * ______________________
+ * | |
+ * | ARC CPU |
+ * |______________________|
+ * ___|___ ___|___
+ * | | | |
+ * | L1 I$ | | L1 D$ |
+ * |_______| |_______|
+ * on/off on/off
+ * ___|______________|____
+ * | |
+ * | L2 (SL$) |
+ * |______________________|
+ * always must be on
+ * ___|______________|____
+ * | |
+ * | main memory |
+ * |______________________|
+ *
+ * Configuration 3:
+ * ______________________
+ * | |
+ * | ARC CPU |
+ * |______________________|
+ * ___|___ ___|___
+ * | | | |
+ * | L1 I$ | | L1 D$ |
+ * |_______| |_______|
+ * on/off must be on
+ * ___|______________|____ _______
+ * | | | |
+ * | L2 (SL$) |-----| IOC |
+ * |______________________| |_______|
+ * always must be on on/off
+ * ___|______________|____
+ * | |
+ * | main memory |
+ * |______________________|
+ */
+
+DECLARE_GLOBAL_DATA_PTR;
+
/* Bit values in IC_CTRL */
#define IC_CTRL_CACHE_DISABLE BIT(0)
@@ -19,11 +156,10 @@
#define DC_CTRL_CACHE_DISABLE BIT(0)
#define DC_CTRL_INV_MODE_FLUSH BIT(6)
#define DC_CTRL_FLUSH_STATUS BIT(8)
-#define CACHE_VER_NUM_MASK 0xF
-#define OP_INV 0x1
-#define OP_FLUSH 0x2
-#define OP_INV_IC 0x3
+#define OP_INV BIT(0)
+#define OP_FLUSH BIT(1)
+#define OP_FLUSH_N_INV (OP_FLUSH | OP_INV)
/* Bit val in SLC_CONTROL */
#define SLC_CTRL_DIS 0x001
@@ -31,55 +167,117 @@
#define SLC_CTRL_BUSY 0x100
#define SLC_CTRL_RGN_OP_INV 0x200
+#define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1))
+
/*
- * By default that variable will fall into .bss section.
- * But .bss section is not relocated and so it will be initilized before
- * relocation but will be used after being zeroed.
+ * We don't want to use '__always_inline' macro here as it can be redefined
+ * to simple 'inline' in some cases which breaks stuff. See [ NOTE 1 ] for more
+ * details about the reasons we need to use always_inline functions.
*/
-int l1_line_sz __section(".data");
-bool dcache_exists __section(".data") = false;
-bool icache_exists __section(".data") = false;
-
-#define CACHE_LINE_MASK (~(l1_line_sz - 1))
-
-#ifdef CONFIG_ISA_ARCV2
-int slc_line_sz __section(".data");
-bool slc_exists __section(".data") = false;
-bool ioc_exists __section(".data") = false;
-bool pae_exists __section(".data") = false;
+#define inlined_cachefunc inline __attribute__((always_inline))
-/* To force enable IOC set ioc_enable to 'true' */
-bool ioc_enable __section(".data") = false;
+static inlined_cachefunc void __ic_entire_invalidate(void);
+static inlined_cachefunc void __dc_entire_op(const int cacheop);
-void read_decode_mmu_bcr(void)
+static inline bool pae_exists(void)
{
/* TODO: should we compare mmu version from BCR and from CONFIG? */
#if (CONFIG_ARC_MMU_VER >= 4)
- u32 tmp;
+ union bcr_mmu_4 mmu4;
- tmp = read_aux_reg(ARC_AUX_MMU_BCR);
+ mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR);
- struct bcr_mmu_4 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
- n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
-#else
- /* DTLB ITLB JES JE JA */
- unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
- pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
-#endif /* CONFIG_CPU_BIG_ENDIAN */
- } *mmu4;
+ if (mmu4.fields.pae)
+ return true;
+#endif /* (CONFIG_ARC_MMU_VER >= 4) */
- mmu4 = (struct bcr_mmu_4 *)&tmp;
+ return false;
+}
- pae_exists = !!mmu4->pae;
-#endif /* (CONFIG_ARC_MMU_VER >= 4) */
+static inlined_cachefunc bool icache_exists(void)
+{
+ union bcr_di_cache ibcr;
+
+ ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
+ return !!ibcr.fields.ver;
}
-static void __slc_entire_op(const int op)
+static inlined_cachefunc bool icache_enabled(void)
+{
+ if (!icache_exists())
+ return false;
+
+ return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE);
+}
+
+static inlined_cachefunc bool dcache_exists(void)
+{
+ union bcr_di_cache dbcr;
+
+ dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
+ return !!dbcr.fields.ver;
+}
+
+static inlined_cachefunc bool dcache_enabled(void)
+{
+ if (!dcache_exists())
+ return false;
+
+ return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE);
+}
+
+static inlined_cachefunc bool slc_exists(void)
+{
+ if (is_isa_arcv2()) {
+ union bcr_generic sbcr;
+
+ sbcr.word = read_aux_reg(ARC_BCR_SLC);
+ return !!sbcr.fields.ver;
+ }
+
+ return false;
+}
+
+static inlined_cachefunc bool slc_data_bypass(void)
+{
+ /*
+ * If L1 data cache is disabled SL$ is bypassed and all load/store
+ * requests are sent directly to main memory.
+ */
+ return !dcache_enabled();
+}
+
+static inline bool ioc_exists(void)
+{
+ if (is_isa_arcv2()) {
+ union bcr_clust_cfg cbcr;
+
+ cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
+ return cbcr.fields.c;
+ }
+
+ return false;
+}
+
+static inline bool ioc_enabled(void)
+{
+ /*
+ * We check only CONFIG option instead of IOC HW state check as IOC
+ * must be disabled by default.
+ */
+ if (is_ioc_enabled())
+ return ioc_exists();
+
+ return false;
+}
+
+static inlined_cachefunc void __slc_entire_op(const int op)
{
unsigned int ctrl;
+ if (!slc_exists())
+ return;
+
ctrl = read_aux_reg(ARC_AUX_SLC_CTRL);
if (!(op & OP_FLUSH)) /* i.e. OP_INV */
@@ -104,6 +302,14 @@ static void __slc_entire_op(const int op)
static void slc_upper_region_init(void)
{
/*
+ * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
+ * only if PAE exists in current HW. So we had to check pae_exist
+ * before using them.
+ */
+ if (!pae_exists())
+ return;
+
+ /*
* ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0
* as we don't use PAE40.
*/
@@ -113,9 +319,14 @@ static void slc_upper_region_init(void)
static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
{
+#ifdef CONFIG_ISA_ARCV2
+
unsigned int ctrl;
unsigned long end;
+ if (!slc_exists())
+ return;
+
/*
* The Region Flush operation is specified by CTRL.RGN_OP[11..9]
* - b'000 (default) is Flush,
@@ -142,7 +353,7 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
* END needs to be setup before START (latter triggers the operation)
* END can't be same as START, so add (l2_line_sz - 1) to sz
*/
- end = paddr + sz + slc_line_sz - 1;
+ end = paddr + sz + gd->arch.slc_line_sz - 1;
/*
* Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1)
@@ -156,85 +367,82 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op)
read_aux_reg(ARC_AUX_SLC_CTRL);
while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY);
-}
+
#endif /* CONFIG_ISA_ARCV2 */
+}
+
+static void arc_ioc_setup(void)
+{
+ /* IOC Aperture start is equal to DDR start */
+ unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
+ /* IOC Aperture size is equal to DDR size */
+ long ap_size = CONFIG_SYS_SDRAM_SIZE;
+
+ /* Unsupported configuration. See [ NOTE 2 ] for more details. */
+ if (!slc_exists())
+ panic("Try to enable IOC but SLC is not present");
+
+ /* Unsupported configuration. See [ NOTE 2 ] for more details. */
+ if (!dcache_enabled())
+ panic("Try to enable IOC but L1 D$ is disabled");
+
+ if (!is_power_of_2(ap_size) || ap_size < 4096)
+ panic("IOC Aperture size must be power of 2 and bigger 4Kib");
+
+ /* IOC Aperture start must be aligned to the size of the aperture */
+ if (ap_base % ap_size != 0)
+ panic("IOC Aperture start must be aligned to the size of the aperture");
+
+ flush_n_invalidate_dcache_all();
+
+ /*
+ * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
+ * so setting 0x11 implies 512M, 0x12 implies 1G...
+ */
+ write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
+ order_base_2(ap_size / 1024) - 2);
+
+ write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
+ write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
+ write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
+}
-#ifdef CONFIG_ISA_ARCV2
static void read_decode_cache_bcr_arcv2(void)
{
- union {
- struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:24, way:2, lsz:2, sz:4;
-#else
- unsigned int sz:4, lsz:2, way:2, pad:24;
-#endif
- } fields;
- unsigned int word;
- } slc_cfg;
-
- union {
- struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:24, ver:8;
-#else
- unsigned int ver:8, pad:24;
-#endif
- } fields;
- unsigned int word;
- } sbcr;
+#ifdef CONFIG_ISA_ARCV2
- sbcr.word = read_aux_reg(ARC_BCR_SLC);
- if (sbcr.fields.ver) {
+ union bcr_slc_cfg slc_cfg;
+
+ if (slc_exists()) {
slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG);
- slc_exists = true;
- slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
- }
+ gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64;
- union {
- struct bcr_clust_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
-#else
- unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
-#endif
- } fields;
- unsigned int word;
- } cbcr;
+ /*
+ * We don't support configuration where L1 I$ or L1 D$ is
+ * absent but SL$ exists. See [ NOTE 2 ] for more details.
+ */
+ if (!icache_exists() || !dcache_exists())
+ panic("Unsupported cache configuration: SLC exists but one of L1 caches is absent");
+ }
- cbcr.word = read_aux_reg(ARC_BCR_CLUSTER);
- if (cbcr.fields.c && ioc_enable)
- ioc_exists = true;
+#endif /* CONFIG_ISA_ARCV2 */
}
-#endif
void read_decode_cache_bcr(void)
{
int dc_line_sz = 0, ic_line_sz = 0;
-
- union {
- struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
- unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
- } fields;
- unsigned int word;
- } ibcr, dbcr;
+ union bcr_di_cache ibcr, dbcr;
ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD);
if (ibcr.fields.ver) {
- icache_exists = true;
- l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
+ gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len;
if (!ic_line_sz)
panic("Instruction exists but line length is 0\n");
}
dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD);
if (dbcr.fields.ver) {
- dcache_exists = true;
- l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
+ gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len;
if (!dc_line_sz)
panic("Data cache exists but line length is 0\n");
}
@@ -247,109 +455,79 @@ void cache_init(void)
{
read_decode_cache_bcr();
-#ifdef CONFIG_ISA_ARCV2
- read_decode_cache_bcr_arcv2();
-
- if (ioc_exists) {
- /* IOC Aperture start is equal to DDR start */
- unsigned int ap_base = CONFIG_SYS_SDRAM_BASE;
- /* IOC Aperture size is equal to DDR size */
- long ap_size = CONFIG_SYS_SDRAM_SIZE;
-
- flush_dcache_all();
- invalidate_dcache_all();
+ if (is_isa_arcv2())
+ read_decode_cache_bcr_arcv2();
- if (!is_power_of_2(ap_size) || ap_size < 4096)
- panic("IOC Aperture size must be power of 2 and bigger 4Kib");
-
- /*
- * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB,
- * so setting 0x11 implies 512M, 0x12 implies 1G...
- */
- write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE,
- order_base_2(ap_size / 1024) - 2);
-
- /* IOC Aperture start must be aligned to the size of the aperture */
- if (ap_base % ap_size != 0)
- panic("IOC Aperture start must be aligned to the size of the aperture");
-
- write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12);
- write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1);
- write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1);
- }
+ if (is_isa_arcv2() && ioc_enabled())
+ arc_ioc_setup();
- read_decode_mmu_bcr();
-
- /*
- * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist
- * only if PAE exists in current HW. So we had to check pae_exist
- * before using them.
- */
- if (slc_exists && pae_exists)
+ if (is_isa_arcv2() && slc_exists())
slc_upper_region_init();
-#endif /* CONFIG_ISA_ARCV2 */
}
int icache_status(void)
{
- if (!icache_exists)
- return 0;
-
- if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE)
- return 0;
- else
- return 1;
+ return icache_enabled();
}
void icache_enable(void)
{
- if (icache_exists)
+ if (icache_exists())
write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) &
~IC_CTRL_CACHE_DISABLE);
}
void icache_disable(void)
{
- if (icache_exists)
- write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
- IC_CTRL_CACHE_DISABLE);
+ if (!icache_exists())
+ return;
+
+ __ic_entire_invalidate();
+
+ write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) |
+ IC_CTRL_CACHE_DISABLE);
}
-void invalidate_icache_all(void)
+/* IC supports only invalidation */
+static inlined_cachefunc void __ic_entire_invalidate(void)
{
+ if (!icache_enabled())
+ return;
+
/* Any write to IC_IVIC register triggers invalidation of entire I$ */
- if (icache_status()) {
- write_aux_reg(ARC_AUX_IC_IVIC, 1);
- /*
- * As per ARC HS databook (see chapter 5.3.3.2)
- * it is required to add 3 NOPs after each write to IC_IVIC.
- */
- __builtin_arc_nop();
- __builtin_arc_nop();
- __builtin_arc_nop();
- read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */
- }
+ write_aux_reg(ARC_AUX_IC_IVIC, 1);
+ /*
+ * As per ARC HS databook (see chapter 5.3.3.2)
+ * it is required to add 3 NOPs after each write to IC_IVIC.
+ */
+ __builtin_arc_nop();
+ __builtin_arc_nop();
+ __builtin_arc_nop();
+ read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */
+}
-#ifdef CONFIG_ISA_ARCV2
- if (slc_exists)
+void invalidate_icache_all(void)
+{
+ __ic_entire_invalidate();
+
+ /*
+ * If SL$ is bypassed for data it is used only for instructions,
+ * so we need to invalidate it too.
+ * TODO: HS 3.0 supports SLC disable so we need to check slc
+ * enable/disable status here.
+ */
+ if (is_isa_arcv2() && slc_data_bypass())
__slc_entire_op(OP_INV);
-#endif
}
int dcache_status(void)
{
- if (!dcache_exists)
- return 0;
-
- if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE)
- return 0;
- else
- return 1;
+ return dcache_enabled();
}
void dcache_enable(void)
{
- if (!dcache_exists)
+ if (!dcache_exists())
return;
write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) &
@@ -358,83 +536,77 @@ void dcache_enable(void)
void dcache_disable(void)
{
- if (!dcache_exists)
+ if (!dcache_exists())
return;
+ __dc_entire_op(OP_FLUSH_N_INV);
+
+ /*
+ * As SLC will be bypassed for data after L1 D$ disable we need to
+ * flush it first before L1 D$ disable. Also we invalidate SLC to
+ * avoid any inconsistent data problems after enabling L1 D$ again with
+ * dcache_enable function.
+ */
+ if (is_isa_arcv2())
+ __slc_entire_op(OP_FLUSH_N_INV);
+
write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) |
DC_CTRL_CACHE_DISABLE);
}
-#ifndef CONFIG_SYS_DCACHE_OFF
-/*
- * Common Helper for Line Operations on {I,D}-Cache
- */
-static inline void __cache_line_loop(unsigned long paddr, unsigned long sz,
- const int cacheop)
+/* Common Helper for Line Operations on D-cache */
+static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz,
+ const int cacheop)
{
unsigned int aux_cmd;
-#if (CONFIG_ARC_MMU_VER == 3)
- unsigned int aux_tag;
-#endif
int num_lines;
- if (cacheop == OP_INV_IC) {
- aux_cmd = ARC_AUX_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER == 3)
- aux_tag = ARC_AUX_IC_PTAG;
-#endif
- } else {
- /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
- aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER == 3)
- aux_tag = ARC_AUX_DC_PTAG;
-#endif
- }
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL;
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
- num_lines = DIV_ROUND_UP(sz, l1_line_sz);
+ num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz);
while (num_lines-- > 0) {
#if (CONFIG_ARC_MMU_VER == 3)
- write_aux_reg(aux_tag, paddr);
+ write_aux_reg(ARC_AUX_DC_PTAG, paddr);
#endif
write_aux_reg(aux_cmd, paddr);
- paddr += l1_line_sz;
+ paddr += gd->arch.l1_line_sz;
}
}
-static unsigned int __before_dc_op(const int op)
+static inlined_cachefunc void __before_dc_op(const int op)
{
- unsigned int reg;
+ unsigned int ctrl;
- if (op == OP_INV) {
- /*
- * IM is set by default and implies Flush-n-inv
- * Clear it here for vanilla inv
- */
- reg = read_aux_reg(ARC_AUX_DC_CTRL);
- write_aux_reg(ARC_AUX_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
- }
+ ctrl = read_aux_reg(ARC_AUX_DC_CTRL);
- return reg;
+ /* IM bit implies flush-n-inv, instead of vanilla inv */
+ if (op == OP_INV)
+ ctrl &= ~DC_CTRL_INV_MODE_FLUSH;
+ else
+ ctrl |= DC_CTRL_INV_MODE_FLUSH;
+
+ write_aux_reg(ARC_AUX_DC_CTRL, ctrl);
}
-static void __after_dc_op(const int op, unsigned int reg)
+static inlined_cachefunc void __after_dc_op(const int op)
{
if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
-
- /* Switch back to default Invalidate mode */
- if (op == OP_INV)
- write_aux_reg(ARC_AUX_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH);
}
-static inline void __dc_entire_op(const int cacheop)
+static inlined_cachefunc void __dc_entire_op(const int cacheop)
{
int aux;
- unsigned int ctrl_reg = __before_dc_op(cacheop);
+
+ if (!dcache_enabled())
+ return;
+
+ __before_dc_op(cacheop);
if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
aux = ARC_AUX_DC_IVDC;
@@ -443,36 +615,36 @@ static inline void __dc_entire_op(const int cacheop)
write_aux_reg(aux, 0x1);
- __after_dc_op(cacheop, ctrl_reg);
+ __after_dc_op(cacheop);
}
static inline void __dc_line_op(unsigned long paddr, unsigned long sz,
const int cacheop)
{
- unsigned int ctrl_reg = __before_dc_op(cacheop);
+ if (!dcache_enabled())
+ return;
- __cache_line_loop(paddr, sz, cacheop);
- __after_dc_op(cacheop, ctrl_reg);
+ __before_dc_op(cacheop);
+ __dcache_line_loop(paddr, sz, cacheop);
+ __after_dc_op(cacheop);
}
-#else
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, sz, cacheop)
-#endif /* !CONFIG_SYS_DCACHE_OFF */
void invalidate_dcache_range(unsigned long start, unsigned long end)
{
if (start >= end)
return;
-#ifdef CONFIG_ISA_ARCV2
- if (!ioc_exists)
-#endif
+ /*
+ * ARCv1 -> call __dc_line_op
+ * ARCv2 && L1 D$ disabled -> nothing
+ * ARCv2 && L1 D$ enabled && IOC enabled -> nothing
+ * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op
+ */
+ if (!is_isa_arcv2() || !ioc_enabled())
__dc_line_op(start, end - start, OP_INV);
-#ifdef CONFIG_ISA_ARCV2
- if (slc_exists && !ioc_exists)
+ if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass())
__slc_rgn_op(start, end - start, OP_INV);
-#endif
}
void flush_dcache_range(unsigned long start, unsigned long end)
@@ -480,15 +652,17 @@ void flush_dcache_range(unsigned long start, unsigned long end)
if (start >= end)
return;
-#ifdef CONFIG_ISA_ARCV2
- if (!ioc_exists)
-#endif
+ /*
+ * ARCv1 -> call __dc_line_op
+ * ARCv2 && L1 D$ disabled -> nothing
+ * ARCv2 && L1 D$ enabled && IOC enabled -> nothing
+ * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op
+ */
+ if (!is_isa_arcv2() || !ioc_enabled())
__dc_line_op(start, end - start, OP_FLUSH);
-#ifdef CONFIG_ISA_ARCV2
- if (slc_exists && !ioc_exists)
+ if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass())
__slc_rgn_op(start, end - start, OP_FLUSH);
-#endif
}
void flush_cache(unsigned long start, unsigned long size)
@@ -496,22 +670,47 @@ void flush_cache(unsigned long start, unsigned long size)
flush_dcache_range(start, start + size);
}
-void invalidate_dcache_all(void)
+/*
+ * As invalidate_dcache_all() is not used in generic U-Boot code and as we
+ * don't need it in arch/arc code alone (invalidate without flush) we implement
+ * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because
+ * it's much safer. See [ NOTE 1 ] for more details.
+ */
+void flush_n_invalidate_dcache_all(void)
{
- __dc_entire_op(OP_INV);
+ __dc_entire_op(OP_FLUSH_N_INV);
-#ifdef CONFIG_ISA_ARCV2
- if (slc_exists)
- __slc_entire_op(OP_INV);
-#endif
+ if (is_isa_arcv2() && !slc_data_bypass())
+ __slc_entire_op(OP_FLUSH_N_INV);
}
void flush_dcache_all(void)
{
__dc_entire_op(OP_FLUSH);
-#ifdef CONFIG_ISA_ARCV2
- if (slc_exists)
+ if (is_isa_arcv2() && !slc_data_bypass())
__slc_entire_op(OP_FLUSH);
-#endif
+}
+
+/*
+ * This is function to cleanup all caches (and therefore sync I/D caches) which
+ * can be used for cleanup before linux launch or to sync caches during
+ * relocation.
+ */
+void sync_n_cleanup_cache_all(void)
+{
+ __dc_entire_op(OP_FLUSH_N_INV);
+
+ /*
+ * If SL$ is bypassed for data it is used only for instructions,
+ * and we shouldn't flush it. So invalidate it instead of flush_n_inv.
+ */
+ if (is_isa_arcv2()) {
+ if (slc_data_bypass())
+ __slc_entire_op(OP_INV);
+ else
+ __slc_entire_op(OP_FLUSH_N_INV);
+ }
+
+ __ic_entire_invalidate();
}
diff --git a/arch/arc/lib/init_helpers.c b/arch/arc/lib/init_helpers.c
index dbc8d68ffb..435fe96ef4 100644
--- a/arch/arc/lib/init_helpers.c
+++ b/arch/arc/lib/init_helpers.c
@@ -4,14 +4,14 @@
* SPDX-License-Identifier: GPL-2.0+
*/
+#include <asm/cache.h>
#include <common.h>
DECLARE_GLOBAL_DATA_PTR;
int init_cache_f_r(void)
{
-#ifndef CONFIG_SYS_DCACHE_OFF
- flush_dcache_all();
-#endif
+ sync_n_cleanup_cache_all();
+
return 0;
}
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
deleted file mode 100644
index 87bccab51d..0000000000
--- a/arch/arc/lib/memcmp.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#ifdef __LITTLE_ENDIAN__
-#define WORD2 r2
-#define SHIFT r3
-#else /* __BIG_ENDIAN__ */
-#define WORD2 r3
-#define SHIFT r2
-#endif /* _ENDIAN__ */
-
-.global memcmp
-.align 4
-memcmp:
- or %r12, %r0, %r1
- asl_s %r12, %r12, 30
- sub %r3, %r2, 1
- brls %r2, %r12, .Lbytewise
- ld %r4, [%r0, 0]
- ld %r5, [%r1, 0]
- lsr.f %lp_count, %r3, 3
- lpne .Loop_end
- ld_s WORD2, [%r0, 4]
- ld_s %r12, [%r1, 4]
- brne %r4, %r5, .Leven
- ld.a %r4, [%r0, 8]
- ld.a %r5, [%r1, 8]
- brne WORD2, %r12, .Lodd
- nop
-.Loop_end:
- asl_s SHIFT, SHIFT, 3
- bhs_s .Last_cmp
- brne %r4, %r5, .Leven
- ld %r4, [%r0, 4]
- ld %r5, [%r1, 4]
-#ifdef __LITTLE_ENDIAN__
- nop_s
- /* one more load latency cycle */
-.Last_cmp:
- xor %r0, %r4, %r5
- bset %r0, %r0, SHIFT
- sub_s %r1, %r0, 1
- bic_s %r1, %r1, %r0
- norm %r1, %r1
- b.d .Leven_cmp
- and %r1, %r1, 24
-.Leven:
- xor %r0, %r4, %r5
- sub_s %r1, %r0, 1
- bic_s %r1, %r1, %r0
- norm %r1, %r1
- /* slow track insn */
- and %r1, %r1, 24
-.Leven_cmp:
- asl %r2, %r4, %r1
- asl %r12, %r5, %r1
- lsr_s %r2, %r2, 1
- lsr_s %r12, %r12, 1
- j_s.d [%blink]
- sub %r0, %r2, %r12
- .balign 4
-.Lodd:
- xor %r0, WORD2, %r12
- sub_s %r1, %r0, 1
- bic_s %r1, %r1, %r0
- norm %r1, %r1
- /* slow track insn */
- and %r1, %r1, 24
- asl_s %r2, %r2, %r1
- asl_s %r12, %r12, %r1
- lsr_s %r2, %r2, 1
- lsr_s %r12, %r12, 1
- j_s.d [%blink]
- sub %r0, %r2, %r12
-#else /* __BIG_ENDIAN__ */
-.Last_cmp:
- neg_s SHIFT, SHIFT
- lsr %r4, %r4, SHIFT
- lsr %r5, %r5, SHIFT
- /* slow track insn */
-.Leven:
- sub.f %r0, %r4, %r5
- mov.ne %r0, 1
- j_s.d [%blink]
- bset.cs %r0, %r0, 31
-.Lodd:
- cmp_s WORD2, %r12
-
- mov_s %r0, 1
- j_s.d [%blink]
- bset.cs %r0, %r0, 31
-#endif /* _ENDIAN__ */
- .balign 4
-.Lbytewise:
- breq %r2, 0, .Lnil
- ldb %r4, [%r0, 0]
- ldb %r5, [%r1, 0]
- lsr.f %lp_count, %r3
- lpne .Lbyte_end
- ldb_s %r3, [%r0, 1]
- ldb %r12, [%r1, 1]
- brne %r4, %r5, .Lbyte_even
- ldb.a %r4, [%r0, 2]
- ldb.a %r5, [%r1, 2]
- brne %r3, %r12, .Lbyte_odd
- nop
-.Lbyte_end:
- bcc .Lbyte_even
- brne %r4, %r5, .Lbyte_even
- ldb_s %r3, [%r0, 1]
- ldb_s %r12, [%r1, 1]
-.Lbyte_odd:
- j_s.d [%blink]
- sub %r0, %r3, %r12
-.Lbyte_even:
- j_s.d [%blink]
- sub %r0, %r4, %r5
-.Lnil:
- j_s.d [%blink]
- mov %r0, 0
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
deleted file mode 100644
index 51dd73ab8f..0000000000
--- a/arch/arc/lib/memcpy-700.S
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-.global memcpy
-.align 4
-memcpy:
- or %r3, %r0, %r1
- asl_s %r3, %r3, 30
- mov_s %r5, %r0
- brls.d %r2, %r3, .Lcopy_bytewise
- sub.f %r3, %r2, 1
- ld_s %r12, [%r1, 0]
- asr.f %lp_count, %r3, 3
- bbit0.d %r3, 2, .Lnox4
- bmsk_s %r2, %r2, 1
- st.ab %r12, [%r5, 4]
- ld.a %r12, [%r1, 4]
-.Lnox4:
- lppnz .Lendloop
- ld_s %r3, [%r1, 4]
- st.ab %r12, [%r5, 4]
- ld.a %r12, [%r1, 8]
- st.ab %r3, [%r5, 4]
-.Lendloop:
- breq %r2, 0, .Last_store
- ld %r3, [%r5, 0]
-#ifdef __LITTLE_ENDIAN__
- add3 %r2, -1, %r2
- /* uses long immediate */
- xor_s %r12, %r12, %r3
- bmsk %r12, %r12, %r2
- xor_s %r12, %r12, %r3
-#else /* __BIG_ENDIAN__ */
- sub3 %r2, 31, %r2
- /* uses long immediate */
- xor_s %r3, %r3, %r12
- bmsk %r3, %r3, %r2
- xor_s %r12, %r12, %r3
-#endif /* _ENDIAN__ */
-.Last_store:
- j_s.d [%blink]
- st %r12, [%r5, 0]
-
- .balign 4
-.Lcopy_bytewise:
- jcs [%blink]
- ldb_s %r12, [%r1, 0]
- lsr.f %lp_count, %r3
- bhs_s .Lnox1
- stb.ab %r12, [%r5, 1]
- ldb.a %r12, [%r1, 1]
-.Lnox1:
- lppnz .Lendbloop
- ldb_s %r3, [%r1, 1]
- stb.ab %r12, [%r5, 1]
- ldb.a %r12, [%r1, 2]
- stb.ab %r3, [%r5, 1]
-.Lendbloop:
- j_s.d [%blink]
- stb %r12, [%r5, 0]
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
deleted file mode 100644
index 017e8af0e8..0000000000
--- a/arch/arc/lib/memset.S
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
-
-.global memset
-.align 4
-memset:
- mov_s %r4, %r0
- or %r12, %r0, %r2
- bmsk.f %r12, %r12, 1
- extb_s %r1, %r1
- asl %r3, %r1, 8
- beq.d .Laligned
- or_s %r1, %r1, %r3
- brls %r2, SMALL, .Ltiny
- add %r3, %r2, %r0
- stb %r1, [%r3, -1]
- bclr_s %r3, %r3, 0
- stw %r1, [%r3, -2]
- bmsk.f %r12, %r0, 1
- add_s %r2, %r2, %r12
- sub.ne %r2, %r2, 4
- stb.ab %r1, [%r4, 1]
- and %r4, %r4, -2
- stw.ab %r1, [%r4, 2]
- and %r4, %r4, -4
-
- .balign 4
-.Laligned:
- asl %r3, %r1, 16
- lsr.f %lp_count, %r2, 2
- or_s %r1, %r1, %r3
- lpne .Loop_end
- st.ab %r1, [%r4, 4]
-.Loop_end:
- j_s [%blink]
-
- .balign 4
-.Ltiny:
- mov.f %lp_count, %r2
- lpne .Ltiny_end
- stb.ab %r1, [%r4, 1]
-.Ltiny_end:
- j_s [%blink]
-
-/*
- * memzero: @r0 = mem, @r1 = size_t
- * memset: @r0 = mem, @r1 = char, @r2 = size_t
- */
-
-.global memzero
-.align 4
-memzero:
- /* adjust bzero args to memset args */
- mov %r2, %r1
- mov %r1, 0
- /* tail call so need to tinker with blink */
- b memset
diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c
index 7802f40545..96b4bd3d8f 100644
--- a/arch/arc/lib/relocate.c
+++ b/arch/arc/lib/relocate.c
@@ -17,6 +17,9 @@ int copy_uboot_to_ram(void)
{
size_t len = (size_t)&__image_copy_end - (size_t)&__image_copy_start;
+ if (gd->flags & GD_FLG_SKIP_RELOC)
+ return 0;
+
memcpy((void *)gd->relocaddr, (void *)&__image_copy_start, len);
return 0;
@@ -40,6 +43,9 @@ int do_elf_reloc_fixups(void)
Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
+ if (gd->flags & GD_FLG_SKIP_RELOC)
+ return 0;
+
debug("Section .rela.dyn is located at %08x-%08x\n",
(unsigned int)re_src, (unsigned int)re_end);
diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S
index 0d72fe71d4..c78dd001d8 100644
--- a/arch/arc/lib/start.S
+++ b/arch/arc/lib/start.S
@@ -10,26 +10,6 @@
#include <asm/arcregs.h>
ENTRY(_start)
-; ARCompact devices are not supposed to be SMP so master/slave check
-; makes no sense.
-#ifdef CONFIG_ISA_ARCV2
- ; Non-masters will be halted immediately, they might be kicked later
- ; by platform code right before passing control to the Linux kernel
- ; in bootm.c:boot_jump_linux().
- lr r5, [identity]
- lsr r5, r5, 8
- bmsk r5, r5, 7
- cmp r5, 0
- mov.nz r0, r5
- bz .Lmaster_proceed
- flag 1
- nop
- nop
- nop
-
-.Lmaster_proceed:
-#endif
-
/* Setup interrupt vector base that matches "__text_start" */
sr __ivt_start, [ARC_AUX_INTR_VEC_BASE]
@@ -98,7 +78,13 @@ ENTRY(_start)
/* Zero the one and only argument of "board_init_f" */
mov_s %r0, 0
- j board_init_f
+ bl board_init_f
+
+ /* We only get here if relocation is disabled by GD_FLG_SKIP_RELOC */
+ /* Make sure we don't lose GD overwritten by zero new GD */
+ mov %r0, %r25
+ mov %r1, 0
+ bl board_init_r
ENDPROC(_start)
/*
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
deleted file mode 100644
index 55fcc9fb00..0000000000
--- a/arch/arc/lib/strchr-700.S
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-/*
- * ARC700 has a relatively long pipeline and branch prediction, so we want
- * to avoid branches that are hard to predict. On the other hand, the
- * presence of the norm instruction makes it easier to operate on whole
- * words branch-free.
- */
-
-.global strchr
-.align 4
-strchr:
- extb_s %r1, %r1
- asl %r5, %r1, 8
- bmsk %r2, %r0, 1
- or %r5, %r5, %r1
- mov_s %r3, 0x01010101
- breq.d %r2, %r0, .Laligned
- asl %r4, %r5, 16
- sub_s %r0, %r0, %r2
- asl %r7, %r2, 3
- ld_s %r2, [%r0]
-#ifdef __LITTLE_ENDIAN__
- asl %r7, %r3, %r7
-#else /* __BIG_ENDIAN__ */
- lsr %r7, %r3, %r7
-#endif /* _ENDIAN__ */
- or %r5, %r5, %r4
- ror %r4, %r3
- sub %r12, %r2, %r7
- bic_s %r12, %r12, %r2
- and %r12, %r12, %r4
- brne.d %r12, 0, .Lfound0_ua
- xor %r6, %r2, %r5
- ld.a %r2, [%r0, 4]
- sub %r12, %r6, %r7
- bic %r12, %r12, %r6
-#ifdef __LITTLE_ENDIAN__
- and %r7, %r12, %r4
- /* For speed, we want this branch to be unaligned. */
- breq %r7, 0, .Loop
- /* Likewise this one */
- b .Lfound_char
-#else /* __BIG_ENDIAN__ */
- and %r12, %r12, %r4
- /* For speed, we want this branch to be unaligned. */
- breq %r12, 0, .Loop
- lsr_s %r12, %r12, 7
- bic %r2, %r7, %r6
- b.d .Lfound_char_b
- and_s %r2, %r2, %r12
-#endif /* _ENDIAN__ */
- /* We require this code address to be unaligned for speed... */
-.Laligned:
- ld_s %r2, [%r0]
- or %r5, %r5, %r4
- ror %r4, %r3
- /* ... so that this code address is aligned, for itself and ... */
-.Loop:
- sub %r12, %r2, %r3
- bic_s %r12, %r12, %r2
- and %r12, %r12, %r4
- brne.d %r12, 0, .Lfound0
- xor %r6, %r2, %r5
- ld.a %r2, [%r0, 4]
- sub %r12, %r6, %r3
- bic %r12, %r12, %r6
- and %r7, %r12, %r4
- breq %r7, 0, .Loop
- /*
- *... so that this branch is unaligned.
- * Found searched-for character.
- * r0 has already advanced to next word.
- */
-#ifdef __LITTLE_ENDIAN__
- /*
- * We only need the information about the first matching byte
- * (i.e. the least significant matching byte) to be exact,
- * hence there is no problem with carry effects.
- */
-.Lfound_char:
- sub %r3, %r7, 1
- bic %r3, %r3, %r7
- norm %r2, %r3
- sub_s %r0, %r0, 1
- asr_s %r2, %r2, 3
- j.d [%blink]
- sub_s %r0, %r0, %r2
-
- .balign 4
-.Lfound0_ua:
- mov %r3, %r7
-.Lfound0:
- sub %r3, %r6, %r3
- bic %r3, %r3, %r6
- and %r2, %r3, %r4
- or_s %r12, %r12, %r2
- sub_s %r3, %r12, 1
- bic_s %r3, %r3, %r12
- norm %r3, %r3
- add_s %r0, %r0, 3
- asr_s %r12, %r3, 3
- asl.f 0, %r2, %r3
- sub_s %r0, %r0, %r12
- j_s.d [%blink]
- mov.pl %r0, 0
-#else /* __BIG_ENDIAN__ */
-.Lfound_char:
- lsr %r7, %r7, 7
-
- bic %r2, %r7, %r6
-.Lfound_char_b:
- norm %r2, %r2
- sub_s %r0, %r0, 4
- asr_s %r2, %r2, 3
- j.d [%blink]
- add_s %r0, %r0, %r2
-
-.Lfound0_ua:
- mov_s %r3, %r7
-.Lfound0:
- asl_s %r2, %r2, 7
- or %r7, %r6, %r4
- bic_s %r12, %r12, %r2
- sub %r2, %r7, %r3
- or %r2, %r2, %r6
- bic %r12, %r2, %r12
- bic.f %r3, %r4, %r12
- norm %r3, %r3
-
- add.pl %r3, %r3, 1
- asr_s %r12, %r3, 3
- asl.f 0, %r2, %r3
- add_s %r0, %r0, %r12
- j_s.d [%blink]
- mov.mi %r0, 0
-#endif /* _ENDIAN__ */
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
deleted file mode 100644
index 8cb7d2f18c..0000000000
--- a/arch/arc/lib/strcmp.S
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-/*
- * This is optimized primarily for the ARC700.
- * It would be possible to speed up the loops by one cycle / word
- * respective one cycle / byte by forcing double source 1 alignment, unrolling
- * by a factor of two, and speculatively loading the second word / byte of
- * source 1; however, that would increase the overhead for loop setup / finish,
- * and strcmp might often terminate early.
- */
-
-.global strcmp
-.align 4
-strcmp:
- or %r2, %r0, %r1
- bmsk_s %r2, %r2, 1
- brne %r2, 0, .Lcharloop
- mov_s %r12, 0x01010101
- ror %r5, %r12
-.Lwordloop:
- ld.ab %r2, [%r0, 4]
- ld.ab %r3, [%r1, 4]
- nop_s
- sub %r4, %r2, %r12
- bic %r4, %r4, %r2
- and %r4, %r4, %r5
- brne %r4, 0, .Lfound0
- breq %r2 ,%r3, .Lwordloop
-#ifdef __LITTLE_ENDIAN__
- xor %r0, %r2, %r3 /* mask for difference */
- sub_s %r1, %r0, 1
- bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
- sub %r1, %r5, %r0
- xor %r0, %r5, %r1 /* mask for least significant difference byte */
- and_s %r2, %r2, %r0
- and_s %r3, %r3, %r0
-#endif /* _ENDIAN__ */
- cmp_s %r2, %r3
- mov_s %r0, 1
- j_s.d [%blink]
- bset.lo %r0, %r0, 31
-
- .balign 4
-#ifdef __LITTLE_ENDIAN__
-.Lfound0:
- xor %r0, %r2, %r3 /* mask for difference */
- or %r0, %r0, %r4 /* or in zero indicator */
- sub_s %r1, %r0, 1
- bic_s %r0, %r0, %r1 /* mask for least significant difference bit */
- sub %r1, %r5, %r0
- xor %r0, %r5, %r1 /* mask for least significant difference byte */
- and_s %r2, %r2, %r0
- and_s %r3, %r3, %r0
- sub.f %r0, %r2, %r3
- mov.hi %r0, 1
- j_s.d [%blink]
- bset.lo %r0, %r0, 31
-#else /* __BIG_ENDIAN__ */
- /*
- * The zero-detection above can mis-detect 0x01 bytes as zeroes
- * because of carry-propagateion from a lower significant zero byte.
- * We can compensate for this by checking that bit0 is zero.
- * This compensation is not necessary in the step where we
- * get a low estimate for r2, because in any affected bytes
- * we already have 0x00 or 0x01, which will remain unchanged
- * when bit 7 is cleared.
- */
- .balign 4
-.Lfound0:
- lsr %r0, %r4, 8
- lsr_s %r1, %r2
- bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */
- bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */
- or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */
- cmp_s %r3, %r2 /* ... be independent of trailing garbage */
- or_s %r2, %r2, %r0 /* likewise for r3 > r2 */
- bic_s %r3, %r3, %r0
- rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */
- cmp_s %r2, %r3
- j_s.d [%blink]
- bset.lo %r0, %r0, 31
-#endif /* _ENDIAN__ */
-
- .balign 4
-.Lcharloop:
- ldb.ab %r2,[%r0,1]
- ldb.ab %r3,[%r1,1]
- nop_s
- breq %r2, 0, .Lcmpend
- breq %r2, %r3, .Lcharloop
-.Lcmpend:
- j_s.d [%blink]
- sub %r0, %r2, %r3
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
deleted file mode 100644
index 41bb53e501..0000000000
--- a/arch/arc/lib/strcpy-700.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-/*
- * If dst and src are 4 byte aligned, copy 8 bytes at a time.
- * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
- * it 8 byte aligned. Thus, we can do a little read-ahead, without
- * dereferencing a cache line that we should not touch.
- * Note that short and long instructions have been scheduled to avoid
- * branch stalls.
- * The beq_s to r3z could be made unaligned & long to avoid a stall
- * there, but it is not likely to be taken often, and it would also be likely
- * to cost an unaligned mispredict at the next call.
- */
-
-.global strcpy
-.align 4
-strcpy:
- or %r2, %r0, %r1
- bmsk_s %r2, %r2, 1
- brne.d %r2, 0, charloop
- mov_s %r10, %r0
- ld_s %r3, [%r1, 0]
- mov %r8, 0x01010101
- bbit0.d %r1, 2, loop_start
- ror %r12, %r8
- sub %r2, %r3, %r8
- bic_s %r2, %r2, %r3
- tst_s %r2,%r12
- bne r3z
- mov_s %r4,%r3
- .balign 4
-loop:
- ld.a %r3, [%r1, 4]
- st.ab %r4, [%r10, 4]
-loop_start:
- ld.a %r4, [%r1, 4]
- sub %r2, %r3, %r8
- bic_s %r2, %r2, %r3
- tst_s %r2, %r12
- bne_s r3z
- st.ab %r3, [%r10, 4]
- sub %r2, %r4, %r8
- bic %r2, %r2, %r4
- tst %r2, %r12
- beq loop
- mov_s %r3, %r4
-#ifdef __LITTLE_ENDIAN__
-r3z: bmsk.f %r1, %r3, 7
- lsr_s %r3, %r3, 8
-#else /* __BIG_ENDIAN__ */
-r3z: lsr.f %r1, %r3, 24
- asl_s %r3, %r3, 8
-#endif /* _ENDIAN__ */
- bne.d r3z
- stb.ab %r1, [%r10, 1]
- j_s [%blink]
-
- .balign 4
-charloop:
- ldb.ab %r3, [%r1, 1]
- brne.d %r3, 0, charloop
- stb.ab %r3, [%r10, 1]
- j [%blink]
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
deleted file mode 100644
index 666e22c0d5..0000000000
--- a/arch/arc/lib/strlen.S
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved.
- *
- * SPDX-License-Identifier: GPL-2.0+
- */
-
-.global strlen
-.align 4
-strlen:
- or %r3, %r0, 7
- ld %r2, [%r3, -7]
- ld.a %r6, [%r3, -3]
- mov %r4, 0x01010101
- /* uses long immediate */
-#ifdef __LITTLE_ENDIAN__
- asl_s %r1, %r0, 3
- btst_s %r0, 2
- asl %r7, %r4, %r1
- ror %r5, %r4
- sub %r1, %r2, %r7
- bic_s %r1, %r1, %r2
- mov.eq %r7, %r4
- sub %r12, %r6, %r7
- bic %r12, %r12, %r6
- or.eq %r12, %r12, %r1
- and %r12, %r12, %r5
- brne %r12, 0, .Learly_end
-#else /* __BIG_ENDIAN__ */
- ror %r5, %r4
- btst_s %r0, 2
- mov_s %r1, 31
- sub3 %r7, %r1, %r0
- sub %r1, %r2, %r4
- bic_s %r1, %r1, %r2
- bmsk %r1, %r1, %r7
- sub %r12, %r6, %r4
- bic %r12, %r12, %r6
- bmsk.ne %r12, %r12, %r7
- or.eq %r12, %r12, %r1
- and %r12, %r12, %r5
- brne %r12, 0, .Learly_end
-#endif /* _ENDIAN__ */
-
-.Loop:
- ld_s %r2, [%r3, 4]
- ld.a %r6, [%r3, 8]
- /* stall for load result */
- sub %r1, %r2, %r4
- bic_s %r1, %r1, %r2
- sub %r12, %r6, %r4
- bic %r12, %r12, %r6
- or %r12, %r12, %r1
- and %r12, %r12, %r5
- breq %r12, 0, .Loop
-.Lend:
- and.f %r1, %r1, %r5
- sub.ne %r3, %r3, 4
- mov.eq %r1, %r12
-#ifdef __LITTLE_ENDIAN__
- sub_s %r2, %r1, 1
- bic_s %r2, %r2, %r1
- norm %r1, %r2
- sub_s %r0, %r0, 3
- lsr_s %r1, %r1, 3
- sub %r0, %r3, %r0
- j_s.d [%blink]
- sub %r0, %r0, %r1
-#else /* __BIG_ENDIAN__ */
- lsr_s %r1, %r1, 7
- mov.eq %r2, %r6
- bic_s %r1, %r1, %r2
- norm %r1, %r1
- sub %r0, %r3, %r0
- lsr_s %r1, %r1, 3
- j_s.d [%blink]
- add %r0, %r0, %r1
-#endif /* _ENDIAN */
-.Learly_end:
- b.d .Lend
- sub_s.ne %r1, %r1, %r1