diff options
-rw-r--r-- | arch/arc/Kconfig | 18 | ||||
-rw-r--r-- | arch/arc/config.mk | 3 | ||||
-rw-r--r-- | arch/arc/include/asm/arc-bcr.h | 77 | ||||
-rw-r--r-- | arch/arc/include/asm/arcregs.h | 11 | ||||
-rw-r--r-- | arch/arc/include/asm/cache.h | 7 | ||||
-rw-r--r-- | arch/arc/include/asm/global_data.h | 6 | ||||
-rw-r--r-- | arch/arc/include/asm/io.h | 8 | ||||
-rw-r--r-- | arch/arc/include/asm/string.h | 26 | ||||
-rw-r--r-- | arch/arc/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/arc/lib/bootm.c | 55 | ||||
-rw-r--r-- | arch/arc/lib/cache.c | 677 | ||||
-rw-r--r-- | arch/arc/lib/init_helpers.c | 6 | ||||
-rw-r--r-- | arch/arc/lib/memcmp.S | 123 | ||||
-rw-r--r-- | arch/arc/lib/memcpy-700.S | 63 | ||||
-rw-r--r-- | arch/arc/lib/memset.S | 62 | ||||
-rw-r--r-- | arch/arc/lib/relocate.c | 6 | ||||
-rw-r--r-- | arch/arc/lib/start.S | 28 | ||||
-rw-r--r-- | arch/arc/lib/strchr-700.S | 141 | ||||
-rw-r--r-- | arch/arc/lib/strcmp.S | 97 | ||||
-rw-r--r-- | arch/arc/lib/strcpy-700.S | 67 | ||||
-rw-r--r-- | arch/arc/lib/strlen.S | 80 | ||||
-rw-r--r-- | board/synopsys/axs10x/axs10x.c | 12 | ||||
-rw-r--r-- | board/synopsys/hsdk/hsdk.c | 11 | ||||
-rw-r--r-- | common/board_f.c | 3 |
24 files changed, 635 insertions, 959 deletions
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index e3f9db7b29..aee15d5353 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -116,6 +116,24 @@ config SYS_DCACHE_OFF bool "Do not use Data Cache" default n +menuconfig ARC_DBG + bool "ARC debugging" + default n + +if ARC_DBG + +config ARC_DBG_IOC_ENABLE + bool "Enable IO coherency unit" + depends on CPU_ARCHS38 + default n + help + Enable IO coherency unit to debug problems with caches and + DMA peripherals. + NOTE: as of today linux will not work properly if this option + is enabled in u-boot! + +endif + choice prompt "Target select" default TARGET_AXS103 diff --git a/arch/arc/config.mk b/arch/arc/config.mk index 3ed0c282ba..d040454d1a 100644 --- a/arch/arc/config.mk +++ b/arch/arc/config.mk @@ -51,9 +51,10 @@ PLATFORM_CPPFLAGS += -mcpu=archs endif PLATFORM_CPPFLAGS += -ffixed-r25 -D__ARC__ -gdwarf-2 -mno-sdata +PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections # Needed for relocation -LDFLAGS_FINAL += -pie +LDFLAGS_FINAL += -pie --gc-sections # Load address for standalone apps CONFIG_STANDALONE_LOAD_ADDR ?= 0x82000000 diff --git a/arch/arc/include/asm/arc-bcr.h b/arch/arc/include/asm/arc-bcr.h new file mode 100644 index 0000000000..823906d946 --- /dev/null +++ b/arch/arc/include/asm/arc-bcr.h @@ -0,0 +1,77 @@ +/* + * ARC Build Configuration Registers, with encoded hardware config + * + * Copyright (C) 2018 Synopsys + * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef __ARC_BCR_H +#define __ARC_BCR_H +#ifndef __ASSEMBLY__ + +#include <config.h> + +union bcr_di_cache { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif + } fields; + unsigned int word; +}; + +union bcr_slc_cfg { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif + } fields; + unsigned int word; +}; + +union bcr_generic { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, ver:8; +#else + unsigned int ver:8, pad:24; +#endif + } fields; + unsigned int word; +}; + +union bcr_clust_cfg { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; +#else + unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; +#endif + } fields; + unsigned int word; +}; + +union bcr_mmu_4 { + struct { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif + } fields; + unsigned int word; +}; + +#endif /* __ASSEMBLY__ */ +#endif /* __ARC_BCR_H */ diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 67f416305d..3a513149f5 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -8,6 +8,7 @@ #define _ASM_ARC_ARCREGS_H #include <asm/cache.h> +#include <config.h> /* * ARC architecture has additional address space - auxiliary registers. @@ -88,6 +89,16 @@ /* ARCNUM [15:8] - field to identify each core in a multi-core system */ #define CPU_ID_GET() ((read_aux_reg(ARC_AUX_IDENTITY) & 0xFF00) >> 8) + +static const inline int is_isa_arcv2(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCV2); +} + +static const inline int is_isa_arcompact(void) +{ + return IS_ENABLED(CONFIG_ISA_ARCOMPACT); +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_ARC_ARCREGS_H */ diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index d26d9fb18d..2269183615 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -30,6 +30,13 @@ #ifndef __ASSEMBLY__ void cache_init(void); +void flush_n_invalidate_dcache_all(void); +void sync_n_cleanup_cache_all(void); + +static const inline int is_ioc_enabled(void) +{ + return IS_ENABLED(CONFIG_ARC_DBG_IOC_ENABLE); +} #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/include/asm/global_data.h b/arch/arc/include/asm/global_data.h index f0242f1ad6..43e1343095 100644 --- a/arch/arc/include/asm/global_data.h +++ b/arch/arc/include/asm/global_data.h @@ -7,9 +7,15 @@ #ifndef __ASM_ARC_GLOBAL_DATA_H #define __ASM_ARC_GLOBAL_DATA_H +#include <config.h> + #ifndef __ASSEMBLY__ /* Architecture-specific global data */ struct arch_global_data { + int l1_line_sz; +#if defined(CONFIG_ISA_ARCV2) + int slc_line_sz; +#endif }; #endif /* __ASSEMBLY__ */ diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h index a12303bc73..060cdf637b 100644 --- a/arch/arc/include/asm/io.h +++ b/arch/arc/include/asm/io.h @@ -10,7 +10,7 @@ #include <linux/types.h> #include <asm/byteorder.h> -#ifdef CONFIG_ISA_ARCV2 +#ifdef __ARCHS__ /* * ARCv2 based HS38 cores are in-order issue, but still weakly ordered @@ -42,12 +42,12 @@ #define mb() asm volatile("sync\n" : : : "memory") #endif -#ifdef CONFIG_ISA_ARCV2 +#ifdef __ARCHS__ #define __iormb() rmb() #define __iowmb() wmb() #else -#define __iormb() do { } while (0) -#define __iowmb() do { } while (0) +#define __iormb() asm volatile("" : : : "memory") +#define __iowmb() asm volatile("" : : : "memory") #endif static inline void sync(void) diff --git a/arch/arc/include/asm/string.h b/arch/arc/include/asm/string.h index 909129c333..8b13789179 100644 --- a/arch/arc/include/asm/string.h +++ b/arch/arc/include/asm/string.h @@ -1,27 +1 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ -#ifndef __ASM_ARC_STRING_H -#define __ASM_ARC_STRING_H - -#define __HAVE_ARCH_MEMSET -#define __HAVE_ARCH_MEMCPY -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_STRCHR -#define __HAVE_ARCH_STRCPY -#define __HAVE_ARCH_STRCMP -#define __HAVE_ARCH_STRLEN - -extern void *memset(void *ptr, int, __kernel_size_t); -extern void *memcpy(void *, const void *, __kernel_size_t); -extern void memzero(void *ptr, __kernel_size_t n); -extern int memcmp(const void *, const void *, __kernel_size_t); -extern char *strchr(const char *s, int c); -extern char *strcpy(char *dest, const char *src); -extern int strcmp(const char *cs, const char *ct); -extern __kernel_size_t strlen(const char *); - -#endif /* __ASM_ARC_STRING_H */ diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile index 12097bf3be..6b7fb0fdff 100644 --- a/arch/arc/lib/Makefile +++ b/arch/arc/lib/Makefile @@ -10,13 +10,6 @@ obj-y += cache.o obj-y += cpu.o obj-y += interrupts.o obj-y += relocate.o -obj-y += strchr-700.o -obj-y += strcmp.o -obj-y += strcpy-700.o -obj-y += strlen.o -obj-y += memcmp.o -obj-y += memcpy-700.o -obj-y += memset.o obj-y += reset.o obj-y += ints_low.o obj-y += init_helpers.o diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c index 4d4acff239..4f04aad34a 100644 --- a/arch/arc/lib/bootm.c +++ b/arch/arc/lib/bootm.c @@ -4,6 +4,7 @@ * SPDX-License-Identifier: GPL-2.0+ */ +#include <asm/cache.h> #include <common.h> DECLARE_GLOBAL_DATA_PTR; @@ -40,41 +41,52 @@ void arch_lmb_reserve(struct lmb *lmb) static int cleanup_before_linux(void) { disable_interrupts(); - flush_dcache_all(); - invalidate_icache_all(); + sync_n_cleanup_cache_all(); return 0; } +__weak int board_prep_linux(bootm_headers_t *images) { return 0; } + /* Subcommand: PREP */ -static void boot_prep_linux(bootm_headers_t *images) +static int boot_prep_linux(bootm_headers_t *images) { - if (image_setup_linux(images)) - hang(); + int ret; + + ret = image_setup_linux(images); + if (ret) + return ret; + + return board_prep_linux(images); } -__weak void smp_set_core_boot_addr(unsigned long addr, int corenr) {} -__weak void smp_kick_all_cpus(void) {} +/* Generic implementation for single core CPU */ +__weak void board_jump_and_run(ulong entry, int zero, int arch, uint params) +{ + void (*kernel_entry)(int zero, int arch, uint params); + + kernel_entry = (void (*)(int, int, uint))entry; + + kernel_entry(zero, arch, params); +} /* Subcommand: GO */ static void boot_jump_linux(bootm_headers_t *images, int flag) { - void (*kernel_entry)(int zero, int arch, uint params); + ulong kernel_entry; unsigned int r0, r2; int fake = (flag & BOOTM_STATE_OS_FAKE_GO); - kernel_entry = (void (*)(int, int, uint))images->ep; + kernel_entry = images->ep; debug("## Transferring control to Linux (at address %08lx)...\n", - (ulong) kernel_entry); + kernel_entry); bootstage_mark(BOOTSTAGE_ID_RUN_OS); printf("\nStarting kernel ...%s\n\n", fake ? "(fake run for tracing)" : ""); bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel"); - cleanup_before_linux(); - if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) { r0 = 2; r2 = (unsigned int)images->ft_addr; @@ -83,11 +95,10 @@ static void boot_jump_linux(bootm_headers_t *images, int flag) r2 = (unsigned int)env_get("bootargs"); } - if (!fake) { - smp_set_core_boot_addr((unsigned long)kernel_entry, -1); - smp_kick_all_cpus(); - kernel_entry(r0, 0, r2); - } + cleanup_before_linux(); + + if (!fake) + board_jump_and_run(kernel_entry, r0, 0, r2); } int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) @@ -96,17 +107,13 @@ int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images) if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE)) return -1; - if (flag & BOOTM_STATE_OS_PREP) { - boot_prep_linux(images); - return 0; - } + if (flag & BOOTM_STATE_OS_PREP) + return boot_prep_linux(images); if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) { boot_jump_linux(images, flag); return 0; } - boot_prep_linux(images); - boot_jump_linux(images, flag); - return 0; + return -1; } diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 04f1d9d59b..8203fae145 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -10,8 +10,145 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <asm/arcregs.h> +#include <asm/arc-bcr.h> #include <asm/cache.h> +/* + * [ NOTE 1 ]: + * Data cache (L1 D$ or SL$) entire invalidate operation or data cache disable + * operation may result in unexpected behavior and data loss even if we flush + * data cache right before invalidation. That may happens if we store any context + * on stack (like we store BLINK register on stack before function call). + * BLINK register is the register where return address is automatically saved + * when we do function call with instructions like 'bl'. + * + * There is the real example: + * We may hang in the next code as we store any BLINK register on stack in + * invalidate_dcache_all() function. + * + * void flush_dcache_all() { + * __dc_entire_op(OP_FLUSH); + * // Other code // + * } + * + * void invalidate_dcache_all() { + * __dc_entire_op(OP_INV); + * // Other code // + * } + * + * void foo(void) { + * flush_dcache_all(); + * invalidate_dcache_all(); + * } + * + * Now let's see what really happens during that code execution: + * + * foo() + * |->> call flush_dcache_all + * [return address is saved to BLINK register] + * [push BLINK] (save to stack) ![point 1] + * |->> call __dc_entire_op(OP_FLUSH) + * [return address is saved to BLINK register] + * [flush L1 D$] + * return [jump to BLINK] + * <<------ + * [other flush_dcache_all code] + * [pop BLINK] (get from stack) + * return [jump to BLINK] + * <<------ + * |->> call invalidate_dcache_all + * [return address is saved to BLINK register] + * [push BLINK] (save to stack) ![point 2] + * |->> call __dc_entire_op(OP_FLUSH) + * [return address is saved to BLINK register] + * [invalidate L1 D$] ![point 3] + * // Oops!!! + * // We lose return address from invalidate_dcache_all function: + * // we save it to stack and invalidate L1 D$ after that! + * return [jump to BLINK] + * <<------ + * [other invalidate_dcache_all code] + * [pop BLINK] (get from stack) + * // we don't have this data in L1 dcache as we invalidated it in [point 3] + * // so we get it from next memory level (for example DDR memory) + * // but in the memory we have value which we save in [point 1], which + * // is return address from flush_dcache_all function (instead of + * // address from current invalidate_dcache_all function which we + * // saved in [point 2] !) + * return [jump to BLINK] + * <<------ + * // As BLINK points to invalidate_dcache_all, we call it again and + * // loop forever. + * + * Fortunately we may fix that by using flush & invalidation of D$ with a single + * one instruction (instead of flush and invalidation instructions pair) and + * enabling force function inline with '__attribute__((always_inline))' gcc + * attribute to avoid any function call (and BLINK store) between cache flush + * and disable. + * + * + * [ NOTE 2 ]: + * As of today we only support the following cache configurations on ARC. + * Other configurations may exist in HW (for example, since version 3.0 HS + * supports SL$ (L2 system level cache) disable) but we don't support it in SW. + * Configuration 1: + * ______________________ + * | | + * | ARC CPU | + * |______________________| + * ___|___ ___|___ + * | | | | + * | L1 I$ | | L1 D$ | + * |_______| |_______| + * on/off on/off + * ___|______________|____ + * | | + * | main memory | + * |______________________| + * + * Configuration 2: + * ______________________ + * | | + * | ARC CPU | + * |______________________| + * ___|___ ___|___ + * | | | | + * | L1 I$ | | L1 D$ | + * |_______| |_______| + * on/off on/off + * ___|______________|____ + * | | + * | L2 (SL$) | + * |______________________| + * always must be on + * ___|______________|____ + * | | + * | main memory | + * |______________________| + * + * Configuration 3: + * ______________________ + * | | + * | ARC CPU | + * |______________________| + * ___|___ ___|___ + * | | | | + * | L1 I$ | | L1 D$ | + * |_______| |_______| + * on/off must be on + * ___|______________|____ _______ + * | | | | + * | L2 (SL$) |-----| IOC | + * |______________________| |_______| + * always must be on on/off + * ___|______________|____ + * | | + * | main memory | + * |______________________| + */ + +DECLARE_GLOBAL_DATA_PTR; + /* Bit values in IC_CTRL */ #define IC_CTRL_CACHE_DISABLE BIT(0) @@ -19,11 +156,10 @@ #define DC_CTRL_CACHE_DISABLE BIT(0) #define DC_CTRL_INV_MODE_FLUSH BIT(6) #define DC_CTRL_FLUSH_STATUS BIT(8) -#define CACHE_VER_NUM_MASK 0xF -#define OP_INV 0x1 -#define OP_FLUSH 0x2 -#define OP_INV_IC 0x3 +#define OP_INV BIT(0) +#define OP_FLUSH BIT(1) +#define OP_FLUSH_N_INV (OP_FLUSH | OP_INV) /* Bit val in SLC_CONTROL */ #define SLC_CTRL_DIS 0x001 @@ -31,55 +167,117 @@ #define SLC_CTRL_BUSY 0x100 #define SLC_CTRL_RGN_OP_INV 0x200 +#define CACHE_LINE_MASK (~(gd->arch.l1_line_sz - 1)) + /* - * By default that variable will fall into .bss section. - * But .bss section is not relocated and so it will be initilized before - * relocation but will be used after being zeroed. + * We don't want to use '__always_inline' macro here as it can be redefined + * to simple 'inline' in some cases which breaks stuff. See [ NOTE 1 ] for more + * details about the reasons we need to use always_inline functions. */ -int l1_line_sz __section(".data"); -bool dcache_exists __section(".data") = false; -bool icache_exists __section(".data") = false; - -#define CACHE_LINE_MASK (~(l1_line_sz - 1)) - -#ifdef CONFIG_ISA_ARCV2 -int slc_line_sz __section(".data"); -bool slc_exists __section(".data") = false; -bool ioc_exists __section(".data") = false; -bool pae_exists __section(".data") = false; +#define inlined_cachefunc inline __attribute__((always_inline)) -/* To force enable IOC set ioc_enable to 'true' */ -bool ioc_enable __section(".data") = false; +static inlined_cachefunc void __ic_entire_invalidate(void); +static inlined_cachefunc void __dc_entire_op(const int cacheop); -void read_decode_mmu_bcr(void) +static inline bool pae_exists(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ #if (CONFIG_ARC_MMU_VER >= 4) - u32 tmp; + union bcr_mmu_4 mmu4; - tmp = read_aux_reg(ARC_AUX_MMU_BCR); + mmu4.word = read_aux_reg(ARC_AUX_MMU_BCR); - struct bcr_mmu_4 { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, - n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; -#else - /* DTLB ITLB JES JE JA */ - unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, - pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; -#endif /* CONFIG_CPU_BIG_ENDIAN */ - } *mmu4; + if (mmu4.fields.pae) + return true; +#endif /* (CONFIG_ARC_MMU_VER >= 4) */ - mmu4 = (struct bcr_mmu_4 *)&tmp; + return false; +} - pae_exists = !!mmu4->pae; -#endif /* (CONFIG_ARC_MMU_VER >= 4) */ +static inlined_cachefunc bool icache_exists(void) +{ + union bcr_di_cache ibcr; + + ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); + return !!ibcr.fields.ver; } -static void __slc_entire_op(const int op) +static inlined_cachefunc bool icache_enabled(void) +{ + if (!icache_exists()) + return false; + + return !(read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE); +} + +static inlined_cachefunc bool dcache_exists(void) +{ + union bcr_di_cache dbcr; + + dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); + return !!dbcr.fields.ver; +} + +static inlined_cachefunc bool dcache_enabled(void) +{ + if (!dcache_exists()) + return false; + + return !(read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE); +} + +static inlined_cachefunc bool slc_exists(void) +{ + if (is_isa_arcv2()) { + union bcr_generic sbcr; + + sbcr.word = read_aux_reg(ARC_BCR_SLC); + return !!sbcr.fields.ver; + } + + return false; +} + +static inlined_cachefunc bool slc_data_bypass(void) +{ + /* + * If L1 data cache is disabled SL$ is bypassed and all load/store + * requests are sent directly to main memory. + */ + return !dcache_enabled(); +} + +static inline bool ioc_exists(void) +{ + if (is_isa_arcv2()) { + union bcr_clust_cfg cbcr; + + cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); + return cbcr.fields.c; + } + + return false; +} + +static inline bool ioc_enabled(void) +{ + /* + * We check only CONFIG option instead of IOC HW state check as IOC + * must be disabled by default. + */ + if (is_ioc_enabled()) + return ioc_exists(); + + return false; +} + +static inlined_cachefunc void __slc_entire_op(const int op) { unsigned int ctrl; + if (!slc_exists()) + return; + ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); if (!(op & OP_FLUSH)) /* i.e. OP_INV */ @@ -104,6 +302,14 @@ static void __slc_entire_op(const int op) static void slc_upper_region_init(void) { /* + * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist + * only if PAE exists in current HW. So we had to check pae_exist + * before using them. + */ + if (!pae_exists()) + return; + + /* * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0 * as we don't use PAE40. */ @@ -113,9 +319,14 @@ static void slc_upper_region_init(void) static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) { +#ifdef CONFIG_ISA_ARCV2 + unsigned int ctrl; unsigned long end; + if (!slc_exists()) + return; + /* * The Region Flush operation is specified by CTRL.RGN_OP[11..9] * - b'000 (default) is Flush, @@ -142,7 +353,7 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) * END needs to be setup before START (latter triggers the operation) * END can't be same as START, so add (l2_line_sz - 1) to sz */ - end = paddr + sz + slc_line_sz - 1; + end = paddr + sz + gd->arch.slc_line_sz - 1; /* * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1) @@ -156,85 +367,82 @@ static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) read_aux_reg(ARC_AUX_SLC_CTRL); while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); -} + #endif /* CONFIG_ISA_ARCV2 */ +} + +static void arc_ioc_setup(void) +{ + /* IOC Aperture start is equal to DDR start */ + unsigned int ap_base = CONFIG_SYS_SDRAM_BASE; + /* IOC Aperture size is equal to DDR size */ + long ap_size = CONFIG_SYS_SDRAM_SIZE; + + /* Unsupported configuration. See [ NOTE 2 ] for more details. */ + if (!slc_exists()) + panic("Try to enable IOC but SLC is not present"); + + /* Unsupported configuration. See [ NOTE 2 ] for more details. */ + if (!dcache_enabled()) + panic("Try to enable IOC but L1 D$ is disabled"); + + if (!is_power_of_2(ap_size) || ap_size < 4096) + panic("IOC Aperture size must be power of 2 and bigger 4Kib"); + + /* IOC Aperture start must be aligned to the size of the aperture */ + if (ap_base % ap_size != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + + flush_n_invalidate_dcache_all(); + + /* + * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, + * so setting 0x11 implies 512M, 0x12 implies 1G... + */ + write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, + order_base_2(ap_size / 1024) - 2); + + write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); + write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); + write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); +} -#ifdef CONFIG_ISA_ARCV2 static void read_decode_cache_bcr_arcv2(void) { - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:24, way:2, lsz:2, sz:4; -#else - unsigned int sz:4, lsz:2, way:2, pad:24; -#endif - } fields; - unsigned int word; - } slc_cfg; - - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:24, ver:8; -#else - unsigned int ver:8, pad:24; -#endif - } fields; - unsigned int word; - } sbcr; +#ifdef CONFIG_ISA_ARCV2 - sbcr.word = read_aux_reg(ARC_BCR_SLC); - if (sbcr.fields.ver) { + union bcr_slc_cfg slc_cfg; + + if (slc_exists()) { slc_cfg.word = read_aux_reg(ARC_AUX_SLC_CONFIG); - slc_exists = true; - slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; - } + gd->arch.slc_line_sz = (slc_cfg.fields.lsz == 0) ? 128 : 64; - union { - struct bcr_clust_cfg { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; -#else - unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; -#endif - } fields; - unsigned int word; - } cbcr; + /* + * We don't support configuration where L1 I$ or L1 D$ is + * absent but SL$ exists. See [ NOTE 2 ] for more details. + */ + if (!icache_exists() || !dcache_exists()) + panic("Unsupported cache configuration: SLC exists but one of L1 caches is absent"); + } - cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); - if (cbcr.fields.c && ioc_enable) - ioc_exists = true; +#endif /* CONFIG_ISA_ARCV2 */ } -#endif void read_decode_cache_bcr(void) { int dc_line_sz = 0, ic_line_sz = 0; - - union { - struct { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; -#else - unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; -#endif - } fields; - unsigned int word; - } ibcr, dbcr; + union bcr_di_cache ibcr, dbcr; ibcr.word = read_aux_reg(ARC_BCR_IC_BUILD); if (ibcr.fields.ver) { - icache_exists = true; - l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; + gd->arch.l1_line_sz = ic_line_sz = 8 << ibcr.fields.line_len; if (!ic_line_sz) panic("Instruction exists but line length is 0\n"); } dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); if (dbcr.fields.ver) { - dcache_exists = true; - l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; + gd->arch.l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; if (!dc_line_sz) panic("Data cache exists but line length is 0\n"); } @@ -247,109 +455,79 @@ void cache_init(void) { read_decode_cache_bcr(); -#ifdef CONFIG_ISA_ARCV2 - read_decode_cache_bcr_arcv2(); - - if (ioc_exists) { - /* IOC Aperture start is equal to DDR start */ - unsigned int ap_base = CONFIG_SYS_SDRAM_BASE; - /* IOC Aperture size is equal to DDR size */ - long ap_size = CONFIG_SYS_SDRAM_SIZE; - - flush_dcache_all(); - invalidate_dcache_all(); + if (is_isa_arcv2()) + read_decode_cache_bcr_arcv2(); - if (!is_power_of_2(ap_size) || ap_size < 4096) - panic("IOC Aperture size must be power of 2 and bigger 4Kib"); - - /* - * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, - * so setting 0x11 implies 512M, 0x12 implies 1G... - */ - write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, - order_base_2(ap_size / 1024) - 2); - - /* IOC Aperture start must be aligned to the size of the aperture */ - if (ap_base % ap_size != 0) - panic("IOC Aperture start must be aligned to the size of the aperture"); - - write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); - write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); - write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); - } + if (is_isa_arcv2() && ioc_enabled()) + arc_ioc_setup(); - read_decode_mmu_bcr(); - - /* - * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist - * only if PAE exists in current HW. So we had to check pae_exist - * before using them. - */ - if (slc_exists && pae_exists) + if (is_isa_arcv2() && slc_exists()) slc_upper_region_init(); -#endif /* CONFIG_ISA_ARCV2 */ } int icache_status(void) { - if (!icache_exists) - return 0; - - if (read_aux_reg(ARC_AUX_IC_CTRL) & IC_CTRL_CACHE_DISABLE) - return 0; - else - return 1; + return icache_enabled(); } void icache_enable(void) { - if (icache_exists) + if (icache_exists()) write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) & ~IC_CTRL_CACHE_DISABLE); } void icache_disable(void) { - if (icache_exists) - write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | - IC_CTRL_CACHE_DISABLE); + if (!icache_exists()) + return; + + __ic_entire_invalidate(); + + write_aux_reg(ARC_AUX_IC_CTRL, read_aux_reg(ARC_AUX_IC_CTRL) | + IC_CTRL_CACHE_DISABLE); } -void invalidate_icache_all(void) +/* IC supports only invalidation */ +static inlined_cachefunc void __ic_entire_invalidate(void) { + if (!icache_enabled()) + return; + /* Any write to IC_IVIC register triggers invalidation of entire I$ */ - if (icache_status()) { - write_aux_reg(ARC_AUX_IC_IVIC, 1); - /* - * As per ARC HS databook (see chapter 5.3.3.2) - * it is required to add 3 NOPs after each write to IC_IVIC. - */ - __builtin_arc_nop(); - __builtin_arc_nop(); - __builtin_arc_nop(); - read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ - } + write_aux_reg(ARC_AUX_IC_IVIC, 1); + /* + * As per ARC HS databook (see chapter 5.3.3.2) + * it is required to add 3 NOPs after each write to IC_IVIC. + */ + __builtin_arc_nop(); + __builtin_arc_nop(); + __builtin_arc_nop(); + read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ +} -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists) +void invalidate_icache_all(void) +{ + __ic_entire_invalidate(); + + /* + * If SL$ is bypassed for data it is used only for instructions, + * so we need to invalidate it too. + * TODO: HS 3.0 supports SLC disable so we need to check slc + * enable/disable status here. + */ + if (is_isa_arcv2() && slc_data_bypass()) __slc_entire_op(OP_INV); -#endif } int dcache_status(void) { - if (!dcache_exists) - return 0; - - if (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_CACHE_DISABLE) - return 0; - else - return 1; + return dcache_enabled(); } void dcache_enable(void) { - if (!dcache_exists) + if (!dcache_exists()) return; write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) & @@ -358,83 +536,77 @@ void dcache_enable(void) void dcache_disable(void) { - if (!dcache_exists) + if (!dcache_exists()) return; + __dc_entire_op(OP_FLUSH_N_INV); + + /* + * As SLC will be bypassed for data after L1 D$ disable we need to + * flush it first before L1 D$ disable. Also we invalidate SLC to + * avoid any inconsistent data problems after enabling L1 D$ again with + * dcache_enable function. + */ + if (is_isa_arcv2()) + __slc_entire_op(OP_FLUSH_N_INV); + write_aux_reg(ARC_AUX_DC_CTRL, read_aux_reg(ARC_AUX_DC_CTRL) | DC_CTRL_CACHE_DISABLE); } -#ifndef CONFIG_SYS_DCACHE_OFF -/* - * Common Helper for Line Operations on {I,D}-Cache - */ -static inline void __cache_line_loop(unsigned long paddr, unsigned long sz, - const int cacheop) +/* Common Helper for Line Operations on D-cache */ +static inline void __dcache_line_loop(unsigned long paddr, unsigned long sz, + const int cacheop) { unsigned int aux_cmd; -#if (CONFIG_ARC_MMU_VER == 3) - unsigned int aux_tag; -#endif int num_lines; - if (cacheop == OP_INV_IC) { - aux_cmd = ARC_AUX_IC_IVIL; -#if (CONFIG_ARC_MMU_VER == 3) - aux_tag = ARC_AUX_IC_PTAG; -#endif - } else { - /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL; -#if (CONFIG_ARC_MMU_VER == 3) - aux_tag = ARC_AUX_DC_PTAG; -#endif - } + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_AUX_DC_IVDL : ARC_AUX_DC_FLDL; sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; - num_lines = DIV_ROUND_UP(sz, l1_line_sz); + num_lines = DIV_ROUND_UP(sz, gd->arch.l1_line_sz); while (num_lines-- > 0) { #if (CONFIG_ARC_MMU_VER == 3) - write_aux_reg(aux_tag, paddr); + write_aux_reg(ARC_AUX_DC_PTAG, paddr); #endif write_aux_reg(aux_cmd, paddr); - paddr += l1_line_sz; + paddr += gd->arch.l1_line_sz; } } -static unsigned int __before_dc_op(const int op) +static inlined_cachefunc void __before_dc_op(const int op) { - unsigned int reg; + unsigned int ctrl; - if (op == OP_INV) { - /* - * IM is set by default and implies Flush-n-inv - * Clear it here for vanilla inv - */ - reg = read_aux_reg(ARC_AUX_DC_CTRL); - write_aux_reg(ARC_AUX_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); - } + ctrl = read_aux_reg(ARC_AUX_DC_CTRL); - return reg; + /* IM bit implies flush-n-inv, instead of vanilla inv */ + if (op == OP_INV) + ctrl &= ~DC_CTRL_INV_MODE_FLUSH; + else + ctrl |= DC_CTRL_INV_MODE_FLUSH; + + write_aux_reg(ARC_AUX_DC_CTRL, ctrl); } -static void __after_dc_op(const int op, unsigned int reg) +static inlined_cachefunc void __after_dc_op(const int op) { if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS); - - /* Switch back to default Invalidate mode */ - if (op == OP_INV) - write_aux_reg(ARC_AUX_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH); } -static inline void __dc_entire_op(const int cacheop) +static inlined_cachefunc void __dc_entire_op(const int cacheop) { int aux; - unsigned int ctrl_reg = __before_dc_op(cacheop); + + if (!dcache_enabled()) + return; + + __before_dc_op(cacheop); if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ aux = ARC_AUX_DC_IVDC; @@ -443,36 +615,36 @@ static inline void __dc_entire_op(const int cacheop) write_aux_reg(aux, 0x1); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(cacheop); } static inline void __dc_line_op(unsigned long paddr, unsigned long sz, const int cacheop) { - unsigned int ctrl_reg = __before_dc_op(cacheop); + if (!dcache_enabled()) + return; - __cache_line_loop(paddr, sz, cacheop); - __after_dc_op(cacheop, ctrl_reg); + __before_dc_op(cacheop); + __dcache_line_loop(paddr, sz, cacheop); + __after_dc_op(cacheop); } -#else -#define __dc_entire_op(cacheop) -#define __dc_line_op(paddr, sz, cacheop) -#endif /* !CONFIG_SYS_DCACHE_OFF */ void invalidate_dcache_range(unsigned long start, unsigned long end) { if (start >= end) return; -#ifdef CONFIG_ISA_ARCV2 - if (!ioc_exists) -#endif + /* + * ARCv1 -> call __dc_line_op + * ARCv2 && L1 D$ disabled -> nothing + * ARCv2 && L1 D$ enabled && IOC enabled -> nothing + * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op + */ + if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_INV); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists && !ioc_exists) + if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) __slc_rgn_op(start, end - start, OP_INV); -#endif } void flush_dcache_range(unsigned long start, unsigned long end) @@ -480,15 +652,17 @@ void flush_dcache_range(unsigned long start, unsigned long end) if (start >= end) return; -#ifdef CONFIG_ISA_ARCV2 - if (!ioc_exists) -#endif + /* + * ARCv1 -> call __dc_line_op + * ARCv2 && L1 D$ disabled -> nothing + * ARCv2 && L1 D$ enabled && IOC enabled -> nothing + * ARCv2 && L1 D$ enabled && no IOC -> call __dc_line_op; call __slc_rgn_op + */ + if (!is_isa_arcv2() || !ioc_enabled()) __dc_line_op(start, end - start, OP_FLUSH); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists && !ioc_exists) + if (is_isa_arcv2() && !ioc_enabled() && !slc_data_bypass()) __slc_rgn_op(start, end - start, OP_FLUSH); -#endif } void flush_cache(unsigned long start, unsigned long size) @@ -496,22 +670,47 @@ void flush_cache(unsigned long start, unsigned long size) flush_dcache_range(start, start + size); } -void invalidate_dcache_all(void) +/* + * As invalidate_dcache_all() is not used in generic U-Boot code and as we + * don't need it in arch/arc code alone (invalidate without flush) we implement + * flush_n_invalidate_dcache_all (flush and invalidate in 1 operation) because + * it's much safer. See [ NOTE 1 ] for more details. + */ +void flush_n_invalidate_dcache_all(void) { - __dc_entire_op(OP_INV); + __dc_entire_op(OP_FLUSH_N_INV); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists) - __slc_entire_op(OP_INV); -#endif + if (is_isa_arcv2() && !slc_data_bypass()) + __slc_entire_op(OP_FLUSH_N_INV); } void flush_dcache_all(void) { __dc_entire_op(OP_FLUSH); -#ifdef CONFIG_ISA_ARCV2 - if (slc_exists) + if (is_isa_arcv2() && !slc_data_bypass()) __slc_entire_op(OP_FLUSH); -#endif +} + +/* + * This is function to cleanup all caches (and therefore sync I/D caches) which + * can be used for cleanup before linux launch or to sync caches during + * relocation. + */ +void sync_n_cleanup_cache_all(void) +{ + __dc_entire_op(OP_FLUSH_N_INV); + + /* + * If SL$ is bypassed for data it is used only for instructions, + * and we shouldn't flush it. So invalidate it instead of flush_n_inv. + */ + if (is_isa_arcv2()) { + if (slc_data_bypass()) + __slc_entire_op(OP_INV); + else + __slc_entire_op(OP_FLUSH_N_INV); + } + + __ic_entire_invalidate(); } diff --git a/arch/arc/lib/init_helpers.c b/arch/arc/lib/init_helpers.c index dbc8d68ffb..435fe96ef4 100644 --- a/arch/arc/lib/init_helpers.c +++ b/arch/arc/lib/init_helpers.c @@ -4,14 +4,14 @@ * SPDX-License-Identifier: GPL-2.0+ */ +#include <asm/cache.h> #include <common.h> DECLARE_GLOBAL_DATA_PTR; int init_cache_f_r(void) { -#ifndef CONFIG_SYS_DCACHE_OFF - flush_dcache_all(); -#endif + sync_n_cleanup_cache_all(); + return 0; } diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S deleted file mode 100644 index 87bccab51d..0000000000 --- a/arch/arc/lib/memcmp.S +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -#ifdef __LITTLE_ENDIAN__ -#define WORD2 r2 -#define SHIFT r3 -#else /* __BIG_ENDIAN__ */ -#define WORD2 r3 -#define SHIFT r2 -#endif /* _ENDIAN__ */ - -.global memcmp -.align 4 -memcmp: - or %r12, %r0, %r1 - asl_s %r12, %r12, 30 - sub %r3, %r2, 1 - brls %r2, %r12, .Lbytewise - ld %r4, [%r0, 0] - ld %r5, [%r1, 0] - lsr.f %lp_count, %r3, 3 - lpne .Loop_end - ld_s WORD2, [%r0, 4] - ld_s %r12, [%r1, 4] - brne %r4, %r5, .Leven - ld.a %r4, [%r0, 8] - ld.a %r5, [%r1, 8] - brne WORD2, %r12, .Lodd - nop -.Loop_end: - asl_s SHIFT, SHIFT, 3 - bhs_s .Last_cmp - brne %r4, %r5, .Leven - ld %r4, [%r0, 4] - ld %r5, [%r1, 4] -#ifdef __LITTLE_ENDIAN__ - nop_s - /* one more load latency cycle */ -.Last_cmp: - xor %r0, %r4, %r5 - bset %r0, %r0, SHIFT - sub_s %r1, %r0, 1 - bic_s %r1, %r1, %r0 - norm %r1, %r1 - b.d .Leven_cmp - and %r1, %r1, 24 -.Leven: - xor %r0, %r4, %r5 - sub_s %r1, %r0, 1 - bic_s %r1, %r1, %r0 - norm %r1, %r1 - /* slow track insn */ - and %r1, %r1, 24 -.Leven_cmp: - asl %r2, %r4, %r1 - asl %r12, %r5, %r1 - lsr_s %r2, %r2, 1 - lsr_s %r12, %r12, 1 - j_s.d [%blink] - sub %r0, %r2, %r12 - .balign 4 -.Lodd: - xor %r0, WORD2, %r12 - sub_s %r1, %r0, 1 - bic_s %r1, %r1, %r0 - norm %r1, %r1 - /* slow track insn */ - and %r1, %r1, 24 - asl_s %r2, %r2, %r1 - asl_s %r12, %r12, %r1 - lsr_s %r2, %r2, 1 - lsr_s %r12, %r12, 1 - j_s.d [%blink] - sub %r0, %r2, %r12 -#else /* __BIG_ENDIAN__ */ -.Last_cmp: - neg_s SHIFT, SHIFT - lsr %r4, %r4, SHIFT - lsr %r5, %r5, SHIFT - /* slow track insn */ -.Leven: - sub.f %r0, %r4, %r5 - mov.ne %r0, 1 - j_s.d [%blink] - bset.cs %r0, %r0, 31 -.Lodd: - cmp_s WORD2, %r12 - - mov_s %r0, 1 - j_s.d [%blink] - bset.cs %r0, %r0, 31 -#endif /* _ENDIAN__ */ - .balign 4 -.Lbytewise: - breq %r2, 0, .Lnil - ldb %r4, [%r0, 0] - ldb %r5, [%r1, 0] - lsr.f %lp_count, %r3 - lpne .Lbyte_end - ldb_s %r3, [%r0, 1] - ldb %r12, [%r1, 1] - brne %r4, %r5, .Lbyte_even - ldb.a %r4, [%r0, 2] - ldb.a %r5, [%r1, 2] - brne %r3, %r12, .Lbyte_odd - nop -.Lbyte_end: - bcc .Lbyte_even - brne %r4, %r5, .Lbyte_even - ldb_s %r3, [%r0, 1] - ldb_s %r12, [%r1, 1] -.Lbyte_odd: - j_s.d [%blink] - sub %r0, %r3, %r12 -.Lbyte_even: - j_s.d [%blink] - sub %r0, %r4, %r5 -.Lnil: - j_s.d [%blink] - mov %r0, 0 diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S deleted file mode 100644 index 51dd73ab8f..0000000000 --- a/arch/arc/lib/memcpy-700.S +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -.global memcpy -.align 4 -memcpy: - or %r3, %r0, %r1 - asl_s %r3, %r3, 30 - mov_s %r5, %r0 - brls.d %r2, %r3, .Lcopy_bytewise - sub.f %r3, %r2, 1 - ld_s %r12, [%r1, 0] - asr.f %lp_count, %r3, 3 - bbit0.d %r3, 2, .Lnox4 - bmsk_s %r2, %r2, 1 - st.ab %r12, [%r5, 4] - ld.a %r12, [%r1, 4] -.Lnox4: - lppnz .Lendloop - ld_s %r3, [%r1, 4] - st.ab %r12, [%r5, 4] - ld.a %r12, [%r1, 8] - st.ab %r3, [%r5, 4] -.Lendloop: - breq %r2, 0, .Last_store - ld %r3, [%r5, 0] -#ifdef __LITTLE_ENDIAN__ - add3 %r2, -1, %r2 - /* uses long immediate */ - xor_s %r12, %r12, %r3 - bmsk %r12, %r12, %r2 - xor_s %r12, %r12, %r3 -#else /* __BIG_ENDIAN__ */ - sub3 %r2, 31, %r2 - /* uses long immediate */ - xor_s %r3, %r3, %r12 - bmsk %r3, %r3, %r2 - xor_s %r12, %r12, %r3 -#endif /* _ENDIAN__ */ -.Last_store: - j_s.d [%blink] - st %r12, [%r5, 0] - - .balign 4 -.Lcopy_bytewise: - jcs [%blink] - ldb_s %r12, [%r1, 0] - lsr.f %lp_count, %r3 - bhs_s .Lnox1 - stb.ab %r12, [%r5, 1] - ldb.a %r12, [%r1, 1] -.Lnox1: - lppnz .Lendbloop - ldb_s %r3, [%r1, 1] - stb.ab %r12, [%r5, 1] - ldb.a %r12, [%r1, 2] - stb.ab %r3, [%r5, 1] -.Lendbloop: - j_s.d [%blink] - stb %r12, [%r5, 0] diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S deleted file mode 100644 index 017e8af0e8..0000000000 --- a/arch/arc/lib/memset.S +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */ - -.global memset -.align 4 -memset: - mov_s %r4, %r0 - or %r12, %r0, %r2 - bmsk.f %r12, %r12, 1 - extb_s %r1, %r1 - asl %r3, %r1, 8 - beq.d .Laligned - or_s %r1, %r1, %r3 - brls %r2, SMALL, .Ltiny - add %r3, %r2, %r0 - stb %r1, [%r3, -1] - bclr_s %r3, %r3, 0 - stw %r1, [%r3, -2] - bmsk.f %r12, %r0, 1 - add_s %r2, %r2, %r12 - sub.ne %r2, %r2, 4 - stb.ab %r1, [%r4, 1] - and %r4, %r4, -2 - stw.ab %r1, [%r4, 2] - and %r4, %r4, -4 - - .balign 4 -.Laligned: - asl %r3, %r1, 16 - lsr.f %lp_count, %r2, 2 - or_s %r1, %r1, %r3 - lpne .Loop_end - st.ab %r1, [%r4, 4] -.Loop_end: - j_s [%blink] - - .balign 4 -.Ltiny: - mov.f %lp_count, %r2 - lpne .Ltiny_end - stb.ab %r1, [%r4, 1] -.Ltiny_end: - j_s [%blink] - -/* - * memzero: @r0 = mem, @r1 = size_t - * memset: @r0 = mem, @r1 = char, @r2 = size_t - */ - -.global memzero -.align 4 -memzero: - /* adjust bzero args to memset args */ - mov %r2, %r1 - mov %r1, 0 - /* tail call so need to tinker with blink */ - b memset diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c index 7802f40545..96b4bd3d8f 100644 --- a/arch/arc/lib/relocate.c +++ b/arch/arc/lib/relocate.c @@ -17,6 +17,9 @@ int copy_uboot_to_ram(void) { size_t len = (size_t)&__image_copy_end - (size_t)&__image_copy_start; + if (gd->flags & GD_FLG_SKIP_RELOC) + return 0; + memcpy((void *)gd->relocaddr, (void *)&__image_copy_start, len); return 0; @@ -40,6 +43,9 @@ int do_elf_reloc_fixups(void) Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start); Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end); + if (gd->flags & GD_FLG_SKIP_RELOC) + return 0; + debug("Section .rela.dyn is located at %08x-%08x\n", (unsigned int)re_src, (unsigned int)re_end); diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S index 0d72fe71d4..c78dd001d8 100644 --- a/arch/arc/lib/start.S +++ b/arch/arc/lib/start.S @@ -10,26 +10,6 @@ #include <asm/arcregs.h> ENTRY(_start) -; ARCompact devices are not supposed to be SMP so master/slave check -; makes no sense. -#ifdef CONFIG_ISA_ARCV2 - ; Non-masters will be halted immediately, they might be kicked later - ; by platform code right before passing control to the Linux kernel - ; in bootm.c:boot_jump_linux(). - lr r5, [identity] - lsr r5, r5, 8 - bmsk r5, r5, 7 - cmp r5, 0 - mov.nz r0, r5 - bz .Lmaster_proceed - flag 1 - nop - nop - nop - -.Lmaster_proceed: -#endif - /* Setup interrupt vector base that matches "__text_start" */ sr __ivt_start, [ARC_AUX_INTR_VEC_BASE] @@ -98,7 +78,13 @@ ENTRY(_start) /* Zero the one and only argument of "board_init_f" */ mov_s %r0, 0 - j board_init_f + bl board_init_f + + /* We only get here if relocation is disabled by GD_FLG_SKIP_RELOC */ + /* Make sure we don't lose GD overwritten by zero new GD */ + mov %r0, %r25 + mov %r1, 0 + bl board_init_r ENDPROC(_start) /* diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S deleted file mode 100644 index 55fcc9fb00..0000000000 --- a/arch/arc/lib/strchr-700.S +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -/* - * ARC700 has a relatively long pipeline and branch prediction, so we want - * to avoid branches that are hard to predict. On the other hand, the - * presence of the norm instruction makes it easier to operate on whole - * words branch-free. - */ - -.global strchr -.align 4 -strchr: - extb_s %r1, %r1 - asl %r5, %r1, 8 - bmsk %r2, %r0, 1 - or %r5, %r5, %r1 - mov_s %r3, 0x01010101 - breq.d %r2, %r0, .Laligned - asl %r4, %r5, 16 - sub_s %r0, %r0, %r2 - asl %r7, %r2, 3 - ld_s %r2, [%r0] -#ifdef __LITTLE_ENDIAN__ - asl %r7, %r3, %r7 -#else /* __BIG_ENDIAN__ */ - lsr %r7, %r3, %r7 -#endif /* _ENDIAN__ */ - or %r5, %r5, %r4 - ror %r4, %r3 - sub %r12, %r2, %r7 - bic_s %r12, %r12, %r2 - and %r12, %r12, %r4 - brne.d %r12, 0, .Lfound0_ua - xor %r6, %r2, %r5 - ld.a %r2, [%r0, 4] - sub %r12, %r6, %r7 - bic %r12, %r12, %r6 -#ifdef __LITTLE_ENDIAN__ - and %r7, %r12, %r4 - /* For speed, we want this branch to be unaligned. */ - breq %r7, 0, .Loop - /* Likewise this one */ - b .Lfound_char -#else /* __BIG_ENDIAN__ */ - and %r12, %r12, %r4 - /* For speed, we want this branch to be unaligned. */ - breq %r12, 0, .Loop - lsr_s %r12, %r12, 7 - bic %r2, %r7, %r6 - b.d .Lfound_char_b - and_s %r2, %r2, %r12 -#endif /* _ENDIAN__ */ - /* We require this code address to be unaligned for speed... */ -.Laligned: - ld_s %r2, [%r0] - or %r5, %r5, %r4 - ror %r4, %r3 - /* ... so that this code address is aligned, for itself and ... */ -.Loop: - sub %r12, %r2, %r3 - bic_s %r12, %r12, %r2 - and %r12, %r12, %r4 - brne.d %r12, 0, .Lfound0 - xor %r6, %r2, %r5 - ld.a %r2, [%r0, 4] - sub %r12, %r6, %r3 - bic %r12, %r12, %r6 - and %r7, %r12, %r4 - breq %r7, 0, .Loop - /* - *... so that this branch is unaligned. - * Found searched-for character. - * r0 has already advanced to next word. - */ -#ifdef __LITTLE_ENDIAN__ - /* - * We only need the information about the first matching byte - * (i.e. the least significant matching byte) to be exact, - * hence there is no problem with carry effects. - */ -.Lfound_char: - sub %r3, %r7, 1 - bic %r3, %r3, %r7 - norm %r2, %r3 - sub_s %r0, %r0, 1 - asr_s %r2, %r2, 3 - j.d [%blink] - sub_s %r0, %r0, %r2 - - .balign 4 -.Lfound0_ua: - mov %r3, %r7 -.Lfound0: - sub %r3, %r6, %r3 - bic %r3, %r3, %r6 - and %r2, %r3, %r4 - or_s %r12, %r12, %r2 - sub_s %r3, %r12, 1 - bic_s %r3, %r3, %r12 - norm %r3, %r3 - add_s %r0, %r0, 3 - asr_s %r12, %r3, 3 - asl.f 0, %r2, %r3 - sub_s %r0, %r0, %r12 - j_s.d [%blink] - mov.pl %r0, 0 -#else /* __BIG_ENDIAN__ */ -.Lfound_char: - lsr %r7, %r7, 7 - - bic %r2, %r7, %r6 -.Lfound_char_b: - norm %r2, %r2 - sub_s %r0, %r0, 4 - asr_s %r2, %r2, 3 - j.d [%blink] - add_s %r0, %r0, %r2 - -.Lfound0_ua: - mov_s %r3, %r7 -.Lfound0: - asl_s %r2, %r2, 7 - or %r7, %r6, %r4 - bic_s %r12, %r12, %r2 - sub %r2, %r7, %r3 - or %r2, %r2, %r6 - bic %r12, %r2, %r12 - bic.f %r3, %r4, %r12 - norm %r3, %r3 - - add.pl %r3, %r3, 1 - asr_s %r12, %r3, 3 - asl.f 0, %r2, %r3 - add_s %r0, %r0, %r12 - j_s.d [%blink] - mov.mi %r0, 0 -#endif /* _ENDIAN__ */ diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S deleted file mode 100644 index 8cb7d2f18c..0000000000 --- a/arch/arc/lib/strcmp.S +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -/* - * This is optimized primarily for the ARC700. - * It would be possible to speed up the loops by one cycle / word - * respective one cycle / byte by forcing double source 1 alignment, unrolling - * by a factor of two, and speculatively loading the second word / byte of - * source 1; however, that would increase the overhead for loop setup / finish, - * and strcmp might often terminate early. - */ - -.global strcmp -.align 4 -strcmp: - or %r2, %r0, %r1 - bmsk_s %r2, %r2, 1 - brne %r2, 0, .Lcharloop - mov_s %r12, 0x01010101 - ror %r5, %r12 -.Lwordloop: - ld.ab %r2, [%r0, 4] - ld.ab %r3, [%r1, 4] - nop_s - sub %r4, %r2, %r12 - bic %r4, %r4, %r2 - and %r4, %r4, %r5 - brne %r4, 0, .Lfound0 - breq %r2 ,%r3, .Lwordloop -#ifdef __LITTLE_ENDIAN__ - xor %r0, %r2, %r3 /* mask for difference */ - sub_s %r1, %r0, 1 - bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ - sub %r1, %r5, %r0 - xor %r0, %r5, %r1 /* mask for least significant difference byte */ - and_s %r2, %r2, %r0 - and_s %r3, %r3, %r0 -#endif /* _ENDIAN__ */ - cmp_s %r2, %r3 - mov_s %r0, 1 - j_s.d [%blink] - bset.lo %r0, %r0, 31 - - .balign 4 -#ifdef __LITTLE_ENDIAN__ -.Lfound0: - xor %r0, %r2, %r3 /* mask for difference */ - or %r0, %r0, %r4 /* or in zero indicator */ - sub_s %r1, %r0, 1 - bic_s %r0, %r0, %r1 /* mask for least significant difference bit */ - sub %r1, %r5, %r0 - xor %r0, %r5, %r1 /* mask for least significant difference byte */ - and_s %r2, %r2, %r0 - and_s %r3, %r3, %r0 - sub.f %r0, %r2, %r3 - mov.hi %r0, 1 - j_s.d [%blink] - bset.lo %r0, %r0, 31 -#else /* __BIG_ENDIAN__ */ - /* - * The zero-detection above can mis-detect 0x01 bytes as zeroes - * because of carry-propagateion from a lower significant zero byte. - * We can compensate for this by checking that bit0 is zero. - * This compensation is not necessary in the step where we - * get a low estimate for r2, because in any affected bytes - * we already have 0x00 or 0x01, which will remain unchanged - * when bit 7 is cleared. - */ - .balign 4 -.Lfound0: - lsr %r0, %r4, 8 - lsr_s %r1, %r2 - bic_s %r2, %r2, %r0 /* get low estimate for r2 and get ... */ - bic_s %r0, %r0, %r1 /* <this is the adjusted mask for zeros> */ - or_s %r3, %r3, %r0 /* ... high estimate r3 so that r2 > r3 will */ - cmp_s %r3, %r2 /* ... be independent of trailing garbage */ - or_s %r2, %r2, %r0 /* likewise for r3 > r2 */ - bic_s %r3, %r3, %r0 - rlc %r0, 0 /* r0 := r2 > r3 ? 1 : 0 */ - cmp_s %r2, %r3 - j_s.d [%blink] - bset.lo %r0, %r0, 31 -#endif /* _ENDIAN__ */ - - .balign 4 -.Lcharloop: - ldb.ab %r2,[%r0,1] - ldb.ab %r3,[%r1,1] - nop_s - breq %r2, 0, .Lcmpend - breq %r2, %r3, .Lcharloop -.Lcmpend: - j_s.d [%blink] - sub %r0, %r2, %r3 diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S deleted file mode 100644 index 41bb53e501..0000000000 --- a/arch/arc/lib/strcpy-700.S +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -/* - * If dst and src are 4 byte aligned, copy 8 bytes at a time. - * If the src is 4, but not 8 byte aligned, we first read 4 bytes to get - * it 8 byte aligned. Thus, we can do a little read-ahead, without - * dereferencing a cache line that we should not touch. - * Note that short and long instructions have been scheduled to avoid - * branch stalls. - * The beq_s to r3z could be made unaligned & long to avoid a stall - * there, but it is not likely to be taken often, and it would also be likely - * to cost an unaligned mispredict at the next call. - */ - -.global strcpy -.align 4 -strcpy: - or %r2, %r0, %r1 - bmsk_s %r2, %r2, 1 - brne.d %r2, 0, charloop - mov_s %r10, %r0 - ld_s %r3, [%r1, 0] - mov %r8, 0x01010101 - bbit0.d %r1, 2, loop_start - ror %r12, %r8 - sub %r2, %r3, %r8 - bic_s %r2, %r2, %r3 - tst_s %r2,%r12 - bne r3z - mov_s %r4,%r3 - .balign 4 -loop: - ld.a %r3, [%r1, 4] - st.ab %r4, [%r10, 4] -loop_start: - ld.a %r4, [%r1, 4] - sub %r2, %r3, %r8 - bic_s %r2, %r2, %r3 - tst_s %r2, %r12 - bne_s r3z - st.ab %r3, [%r10, 4] - sub %r2, %r4, %r8 - bic %r2, %r2, %r4 - tst %r2, %r12 - beq loop - mov_s %r3, %r4 -#ifdef __LITTLE_ENDIAN__ -r3z: bmsk.f %r1, %r3, 7 - lsr_s %r3, %r3, 8 -#else /* __BIG_ENDIAN__ */ -r3z: lsr.f %r1, %r3, 24 - asl_s %r3, %r3, 8 -#endif /* _ENDIAN__ */ - bne.d r3z - stb.ab %r1, [%r10, 1] - j_s [%blink] - - .balign 4 -charloop: - ldb.ab %r3, [%r1, 1] - brne.d %r3, 0, charloop - stb.ab %r3, [%r10, 1] - j [%blink] diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S deleted file mode 100644 index 666e22c0d5..0000000000 --- a/arch/arc/lib/strlen.S +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2004, 2007-2010, 2011-2014 Synopsys, Inc. All rights reserved. - * - * SPDX-License-Identifier: GPL-2.0+ - */ - -.global strlen -.align 4 -strlen: - or %r3, %r0, 7 - ld %r2, [%r3, -7] - ld.a %r6, [%r3, -3] - mov %r4, 0x01010101 - /* uses long immediate */ -#ifdef __LITTLE_ENDIAN__ - asl_s %r1, %r0, 3 - btst_s %r0, 2 - asl %r7, %r4, %r1 - ror %r5, %r4 - sub %r1, %r2, %r7 - bic_s %r1, %r1, %r2 - mov.eq %r7, %r4 - sub %r12, %r6, %r7 - bic %r12, %r12, %r6 - or.eq %r12, %r12, %r1 - and %r12, %r12, %r5 - brne %r12, 0, .Learly_end -#else /* __BIG_ENDIAN__ */ - ror %r5, %r4 - btst_s %r0, 2 - mov_s %r1, 31 - sub3 %r7, %r1, %r0 - sub %r1, %r2, %r4 - bic_s %r1, %r1, %r2 - bmsk %r1, %r1, %r7 - sub %r12, %r6, %r4 - bic %r12, %r12, %r6 - bmsk.ne %r12, %r12, %r7 - or.eq %r12, %r12, %r1 - and %r12, %r12, %r5 - brne %r12, 0, .Learly_end -#endif /* _ENDIAN__ */ - -.Loop: - ld_s %r2, [%r3, 4] - ld.a %r6, [%r3, 8] - /* stall for load result */ - sub %r1, %r2, %r4 - bic_s %r1, %r1, %r2 - sub %r12, %r6, %r4 - bic %r12, %r12, %r6 - or %r12, %r12, %r1 - and %r12, %r12, %r5 - breq %r12, 0, .Loop -.Lend: - and.f %r1, %r1, %r5 - sub.ne %r3, %r3, 4 - mov.eq %r1, %r12 -#ifdef __LITTLE_ENDIAN__ - sub_s %r2, %r1, 1 - bic_s %r2, %r2, %r1 - norm %r1, %r2 - sub_s %r0, %r0, 3 - lsr_s %r1, %r1, 3 - sub %r0, %r3, %r0 - j_s.d [%blink] - sub %r0, %r0, %r1 -#else /* __BIG_ENDIAN__ */ - lsr_s %r1, %r1, 7 - mov.eq %r2, %r6 - bic_s %r1, %r1, %r2 - norm %r1, %r1 - sub %r0, %r3, %r0 - lsr_s %r1, %r1, 3 - j_s.d [%blink] - add %r0, %r0, %r1 -#endif /* _ENDIAN */ -.Learly_end: - b.d .Lend - sub_s.ne %r1, %r1, %r1 diff --git a/board/synopsys/axs10x/axs10x.c b/board/synopsys/axs10x/axs10x.c index e6b69da3da..18f7666b15 100644 --- a/board/synopsys/axs10x/axs10x.c +++ b/board/synopsys/axs10x/axs10x.c @@ -47,6 +47,18 @@ int board_early_init_f(void) } #ifdef CONFIG_ISA_ARCV2 + +void board_jump_and_run(ulong entry, int zero, int arch, uint params) +{ + void (*kernel_entry)(int zero, int arch, uint params); + + kernel_entry = (void (*)(int, int, uint))entry; + + smp_set_core_boot_addr(entry, -1); + smp_kick_all_cpus(); + kernel_entry(zero, arch, params); +} + #define RESET_VECTOR_ADDR 0x0 void smp_set_core_boot_addr(unsigned long addr, int corenr) diff --git a/board/synopsys/hsdk/hsdk.c b/board/synopsys/hsdk/hsdk.c index 7641978a7b..5b3a063b69 100644 --- a/board/synopsys/hsdk/hsdk.c +++ b/board/synopsys/hsdk/hsdk.c @@ -58,6 +58,17 @@ int board_mmc_init(bd_t *bis) return 0; } +void board_jump_and_run(ulong entry, int zero, int arch, uint params) +{ + void (*kernel_entry)(int zero, int arch, uint params); + + kernel_entry = (void (*)(int, int, uint))entry; + + smp_set_core_boot_addr(entry, -1); + smp_kick_all_cpus(); + kernel_entry(zero, arch, params); +} + #define RESET_VECTOR_ADDR 0x0 void smp_set_core_boot_addr(unsigned long addr, int corenr) diff --git a/common/board_f.c b/common/board_f.c index 62588c5bad..ae8bdb7c5c 100644 --- a/common/board_f.c +++ b/common/board_f.c @@ -900,7 +900,8 @@ void board_init_f(ulong boot_flags) hang(); #if !defined(CONFIG_ARM) && !defined(CONFIG_SANDBOX) && \ - !defined(CONFIG_EFI_APP) && !CONFIG_IS_ENABLED(X86_64) + !defined(CONFIG_EFI_APP) && !CONFIG_IS_ENABLED(X86_64) && \ + !defined(CONFIG_ARC) /* NOTREACHED - jump_to_copy() does not return */ hang(); #endif |