From 41cada4d2499705b321ab650891e76088d330a37 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 16 Jan 2018 19:20:26 +0300 Subject: ARC: ARCv2: Cache: Fixed operation without IOC Previous SLC management implementation is broken. Seems like it was never sufficiently tested probably because most of the time IOC was used instead (i.e. no manual cache operations were done). Now if we disable IOC in U-boot we'll get a lot of errors while using DMA-enabled peripherals. This time we fix it by substitution of broken per-line SLC operations region operations as it is done in the Linux kernel (we took it from v4.14 which is the latest stable as of today). Among other things this implementation might be a bit faster because instead of iteration over each and every cache line we're taking care about entire region in one go. Main changes: * Replaced __slc_line_op (per line operations) by __slc_rgn_op (region operations). * Reworked __slc_entire_op to get rid of __after_slc_op and __before_slc_op functions. Note flush fix (flush only instead of flush-n-inv when OP_FLUSH is used, see [1] for more details) is already incorporated here. * Added SLC invalidation to invalidate_icache_all(). * Added (start >= end) check to invalidate_dcache_range() and flush_dcache_range() as some buggy drivers pass region start == end. * Added read-out of MMU BCR so we may know if PAE40 exists in HW and then act on a particular AUX regs accordingly. [1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-January/003357.html Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 185 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 116 insertions(+), 69 deletions(-) (limited to 'arch/arc/lib') diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index 1073e1570f..a6bbe3ce5d 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -20,12 +20,17 @@ #define DC_CTRL_INV_MODE_FLUSH (1 << 6) #define DC_CTRL_FLUSH_STATUS (1 << 8) #define CACHE_VER_NUM_MASK 0xF -#define SLC_CTRL_SB (1 << 2) #define OP_INV 0x1 #define OP_FLUSH 0x2 #define OP_INV_IC 0x3 +/* Bit val in SLC_CONTROL */ +#define SLC_CTRL_DIS 0x001 +#define SLC_CTRL_IM 0x040 +#define SLC_CTRL_BUSY 0x100 +#define SLC_CTRL_RGN_OP_INV 0x200 + /* * By default that variable will fall into .bss section. * But .bss section is not relocated and so it will be initilized before @@ -41,88 +46,115 @@ bool icache_exists __section(".data") = false; int slc_line_sz __section(".data"); bool slc_exists __section(".data") = false; bool ioc_exists __section(".data") = false; +bool pae_exists __section(".data") = false; -static unsigned int __before_slc_op(const int op) +void read_decode_mmu_bcr(void) { - unsigned int reg = reg; + /* TODO: should we compare mmu version from BCR and from CONFIG? */ +#if (CONFIG_ARC_MMU_VER >= 4) + u32 tmp; - if (op == OP_INV) { - /* - * IM is set by default and implies Flush-n-inv - * Clear it here for vanilla inv - */ - reg = read_aux_reg(ARC_AUX_SLC_CTRL); - write_aux_reg(ARC_AUX_SLC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); - } + tmp = read_aux_reg(ARC_AUX_MMU_BCR); - return reg; -} + struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif /* CONFIG_CPU_BIG_ENDIAN */ + } *mmu4; -static void __after_slc_op(const int op, unsigned int reg) -{ - if (op & OP_FLUSH) { /* flush / flush-n-inv both wait */ - /* - * Make sure "busy" bit reports correct status, - * see STAR 9001165532 - */ - read_aux_reg(ARC_AUX_SLC_CTRL); - while (read_aux_reg(ARC_AUX_SLC_CTRL) & - DC_CTRL_FLUSH_STATUS) - ; - } + mmu4 = (struct bcr_mmu_4 *)&tmp; - /* Switch back to default Invalidate mode */ - if (op == OP_INV) - write_aux_reg(ARC_AUX_SLC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH); + pae_exists = !!mmu4->pae; +#endif /* (CONFIG_ARC_MMU_VER >= 4) */ } -static inline void __slc_line_loop(unsigned long paddr, unsigned long sz, - const int op) +static void __slc_entire_op(const int op) { - unsigned int aux_cmd; - int num_lines; + unsigned int ctrl; + + ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); -#define SLC_LINE_MASK (~(slc_line_sz - 1)) + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; - aux_cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL; + write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); - sz += paddr & ~SLC_LINE_MASK; - paddr &= SLC_LINE_MASK; + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + write_aux_reg(ARC_AUX_SLC_INVALIDATE, 0x1); + else + write_aux_reg(ARC_AUX_SLC_FLUSH, 0x1); - num_lines = DIV_ROUND_UP(sz, slc_line_sz); + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_AUX_SLC_CTRL); - while (num_lines-- > 0) { - write_aux_reg(aux_cmd, paddr); - paddr += slc_line_sz; - } + /* Important to wait for flush to complete */ + while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); } -static inline void __slc_entire_op(const int cacheop) +static void slc_upper_region_init(void) { - int aux; - unsigned int ctrl_reg = __before_slc_op(cacheop); + /* + * ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1 are always == 0 + * as we don't use PAE40. + */ + write_aux_reg(ARC_AUX_SLC_RGN_END1, 0); + write_aux_reg(ARC_AUX_SLC_RGN_START1, 0); +} - if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ - aux = ARC_AUX_SLC_INVALIDATE; +static void __slc_rgn_op(unsigned long paddr, unsigned long sz, const int op) +{ + unsigned int ctrl; + unsigned long end; + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_AUX_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ else - aux = ARC_AUX_SLC_FLUSH; + ctrl |= SLC_CTRL_IM; - write_aux_reg(aux, 0x1); + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; - __after_slc_op(cacheop, ctrl_reg); -} + write_aux_reg(ARC_AUX_SLC_CTRL, ctrl); -static inline void __slc_line_op(unsigned long paddr, unsigned long sz, - const int cacheop) -{ - unsigned int ctrl_reg = __before_slc_op(cacheop); - __slc_line_loop(paddr, sz, cacheop); - __after_slc_op(cacheop, ctrl_reg); + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + end = paddr + sz + slc_line_sz - 1; + + /* + * Upper addresses (ARC_AUX_SLC_RGN_END1 and ARC_AUX_SLC_RGN_START1) + * are always == 0 as we don't use PAE40, so we only setup lower ones + * (ARC_AUX_SLC_RGN_END and ARC_AUX_SLC_RGN_START) + */ + write_aux_reg(ARC_AUX_SLC_RGN_END, end); + write_aux_reg(ARC_AUX_SLC_RGN_START, paddr); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_AUX_SLC_CTRL); + + while (read_aux_reg(ARC_AUX_SLC_CTRL) & SLC_CTRL_BUSY); } -#else -#define __slc_entire_op(cacheop) -#define __slc_line_op(paddr, sz, cacheop) -#endif +#endif /* CONFIG_ISA_ARCV2 */ #ifdef CONFIG_ISA_ARCV2 static void read_decode_cache_bcr_arcv2(void) @@ -244,7 +276,17 @@ void cache_init(void) write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); } -#endif + + read_decode_mmu_bcr(); + + /* + * ARC_AUX_SLC_RGN_START1 and ARC_AUX_SLC_RGN_END1 register exist + * only if PAE exists in current HW. So we had to check pae_exist + * before using them. + */ + if (slc_exists && pae_exists) + slc_upper_region_init(); +#endif /* CONFIG_ISA_ARCV2 */ } int icache_status(void) @@ -272,7 +314,6 @@ void icache_disable(void) IC_CTRL_CACHE_DISABLE); } -#ifndef CONFIG_SYS_DCACHE_OFF void invalidate_icache_all(void) { /* Any write to IC_IVIC register triggers invalidation of entire I$ */ @@ -287,12 +328,12 @@ void invalidate_icache_all(void) __builtin_arc_nop(); read_aux_reg(ARC_AUX_IC_CTRL); /* blocks */ } -} -#else -void invalidate_icache_all(void) -{ -} + +#ifdef CONFIG_ISA_ARCV2 + if (slc_exists) + __slc_entire_op(OP_INV); #endif +} int dcache_status(void) { @@ -419,6 +460,9 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long sz, void invalidate_dcache_range(unsigned long start, unsigned long end) { + if (start >= end) + return; + #ifdef CONFIG_ISA_ARCV2 if (!ioc_exists) #endif @@ -426,12 +470,15 @@ void invalidate_dcache_range(unsigned long start, unsigned long end) #ifdef CONFIG_ISA_ARCV2 if (slc_exists && !ioc_exists) - __slc_line_op(start, end - start, OP_INV); + __slc_rgn_op(start, end - start, OP_INV); #endif } void flush_dcache_range(unsigned long start, unsigned long end) { + if (start >= end) + return; + #ifdef CONFIG_ISA_ARCV2 if (!ioc_exists) #endif @@ -439,7 +486,7 @@ void flush_dcache_range(unsigned long start, unsigned long end) #ifdef CONFIG_ISA_ARCV2 if (slc_exists && !ioc_exists) - __slc_line_op(start, end - start, OP_FLUSH); + __slc_rgn_op(start, end - start, OP_FLUSH); #endif } -- cgit From b0146f9e29ca2e82262416aca65395c322a618f9 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 16 Jan 2018 19:20:28 +0300 Subject: ARC: Cache: Disable IOC by default We'd like to keep IOC HW at the same state as t is right after reset when we start Linux kernel so there will be no re-configuration of IOC on the go. The point is U-Boot doesn't benefit a lot from IOC as it doesn't do a lot of DMA operations especially on multiple cores simultaneously. At the same time re-configuration of IOC in run-time might become quite a tricky experience because we need to make sure there're no DMA trannsactions in flight otherwise unexpected consequencses might affect us much later and debugging those kinds of issues will be a real nightmare. That said let's make our life easier a little bit. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arc/lib') diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index a6bbe3ce5d..d17948dbaf 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -48,6 +48,9 @@ bool slc_exists __section(".data") = false; bool ioc_exists __section(".data") = false; bool pae_exists __section(".data") = false; +/* To force enable IOC set ioc_enable to 'true' */ +bool ioc_enable __section(".data") = false; + void read_decode_mmu_bcr(void) { /* TODO: should we compare mmu version from BCR and from CONFIG? */ @@ -200,7 +203,7 @@ static void read_decode_cache_bcr_arcv2(void) } cbcr; cbcr.word = read_aux_reg(ARC_BCR_CLUSTER); - if (cbcr.fields.c) + if (cbcr.fields.c && ioc_enable) ioc_exists = true; } #endif -- cgit From 19b10a42f6f89f49ce7f7f9c6e575c878f5fd1b7 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 16 Jan 2018 19:20:29 +0300 Subject: ARC: Cache: Fix style violations reported by checkpatch Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/cache.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/arc/lib') diff --git a/arch/arc/lib/cache.c b/arch/arc/lib/cache.c index d17948dbaf..04f1d9d59b 100644 --- a/arch/arc/lib/cache.c +++ b/arch/arc/lib/cache.c @@ -13,12 +13,12 @@ #include /* Bit values in IC_CTRL */ -#define IC_CTRL_CACHE_DISABLE (1 << 0) +#define IC_CTRL_CACHE_DISABLE BIT(0) /* Bit values in DC_CTRL */ -#define DC_CTRL_CACHE_DISABLE (1 << 0) -#define DC_CTRL_INV_MODE_FLUSH (1 << 6) -#define DC_CTRL_FLUSH_STATUS (1 << 8) +#define DC_CTRL_CACHE_DISABLE BIT(0) +#define DC_CTRL_INV_MODE_FLUSH BIT(6) +#define DC_CTRL_FLUSH_STATUS BIT(8) #define CACHE_VER_NUM_MASK 0xF #define OP_INV 0x1 @@ -232,7 +232,7 @@ void read_decode_cache_bcr(void) } dbcr.word = read_aux_reg(ARC_BCR_DC_BUILD); - if (dbcr.fields.ver){ + if (dbcr.fields.ver) { dcache_exists = true; l1_line_sz = dc_line_sz = 16 << dbcr.fields.line_len; if (!dc_line_sz) @@ -267,8 +267,7 @@ void cache_init(void) * so setting 0x11 implies 512M, 0x12 implies 1G... */ write_aux_reg(ARC_AUX_IO_COH_AP0_SIZE, - order_base_2(ap_size/1024) - 2); - + order_base_2(ap_size / 1024) - 2); /* IOC Aperture start must be aligned to the size of the aperture */ if (ap_base % ap_size != 0) @@ -277,7 +276,6 @@ void cache_init(void) write_aux_reg(ARC_AUX_IO_COH_AP0_BASE, ap_base >> 12); write_aux_reg(ARC_AUX_IO_COH_PARTIAL, 1); write_aux_reg(ARC_AUX_IO_COH_ENABLE, 1); - } read_decode_mmu_bcr(); @@ -426,8 +424,7 @@ static unsigned int __before_dc_op(const int op) static void __after_dc_op(const int op, unsigned int reg) { if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ - while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS) - ; + while (read_aux_reg(ARC_AUX_DC_CTRL) & DC_CTRL_FLUSH_STATUS); /* Switch back to default Invalidate mode */ if (op == OP_INV) @@ -453,6 +450,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long sz, const int cacheop) { unsigned int ctrl_reg = __before_dc_op(cacheop); + __cache_line_loop(paddr, sz, cacheop); __after_dc_op(cacheop, ctrl_reg); } -- cgit From c0e6769a82f79a0fc20baa9257ebd17b1cecf4fa Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 16 Jan 2018 21:52:25 +0300 Subject: ARC: Invalidate instruction and data caches early on boot This is useful to make sure no stale data exists in caches after bootloaders. The worst thing could be some lines of cache were locked in a bootloader for example during DDR recalibration and never unlocked. This may lead to really unpredictable issues later down the line. Signed-off-by: Eugeniy Paltsev Signed-off-by: Alexey Brodkin --- arch/arc/lib/start.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/arc/lib') diff --git a/arch/arc/lib/start.S b/arch/arc/lib/start.S index 95d64f9d43..0d72fe71d4 100644 --- a/arch/arc/lib/start.S +++ b/arch/arc/lib/start.S @@ -44,6 +44,14 @@ ENTRY(_start) #endif sr r5, [ARC_AUX_IC_CTRL] + mov r5, 1 + sr r5, [ARC_AUX_IC_IVIC] + ; As per ARC HS databook (see chapter 5.3.3.2) + ; it is required to add 3 NOPs after each write to IC_IVIC. + nop + nop + nop + 1: ; Disable/enable D-cache according to configuration lr r5, [ARC_BCR_DC_BUILD] @@ -57,6 +65,10 @@ ENTRY(_start) #endif sr r5, [ARC_AUX_DC_CTRL] + mov r5, 1 + sr r5, [ARC_AUX_DC_IVDC] + + 1: #ifdef CONFIG_ISA_ARCV2 ; Disable System-Level Cache (SLC) -- cgit