summaryrefslogtreecommitdiff
path: root/arch/riscv/cpu
diff options
context:
space:
mode:
authorLukas Auer <lukas.auer@aisec.fraunhofer.de>2019-03-17 19:28:37 +0100
committerAndes <uboot@andestech.com>2019-04-08 09:44:26 +0800
commit3dea63c8445b25eb3de471410bbafcf54c9f0e9b (patch)
treec1f7e822e73d88ff13c04d723fbf7e15a27270a2 /arch/riscv/cpu
parent1446b26f7652124f0e3e98c348cdbc4fc55eb0cb (diff)
riscv: add support for multi-hart systems
On RISC-V, all harts boot independently. To be able to run on a multi-hart system, U-Boot must be extended with the functionality to manage all harts in the system. All harts entering U-Boot are registered in the available_harts mask stored in global data. A hart lottery system as used in the Linux kernel selects the hart U-Boot runs on. All other harts are halted. U-Boot can delegate functions to them using smp_call_function(). Every hart has a valid pointer to the global data structure and a 8KiB stack by default. The stack size is set with CONFIG_STACK_SIZE_SHIFT. Signed-off-by: Lukas Auer <lukas.auer@aisec.fraunhofer.de> Reviewed-by: Anup Patel <anup.patel@wdc.com> Reviewed-by: Bin Meng <bmeng.cn@gmail.com> Tested-by: Bin Meng <bmeng.cn@gmail.com>
Diffstat (limited to 'arch/riscv/cpu')
-rw-r--r--arch/riscv/cpu/cpu.c9
-rw-r--r--arch/riscv/cpu/start.S134
2 files changed, 141 insertions, 2 deletions
diff --git a/arch/riscv/cpu/cpu.c b/arch/riscv/cpu/cpu.c
index e662140427..c32de8a4c3 100644
--- a/arch/riscv/cpu/cpu.c
+++ b/arch/riscv/cpu/cpu.c
@@ -12,10 +12,17 @@
#include <dm/uclass-internal.h>
/*
- * prior_stage_fdt_address must be stored in the data section since it is used
+ * The variables here must be stored in the data section since they are used
* before the bss section is available.
*/
phys_addr_t prior_stage_fdt_address __attribute__((section(".data")));
+u32 hart_lottery __attribute__((section(".data"))) = 0;
+
+/*
+ * The main hart running U-Boot has acquired available_harts_lock until it has
+ * finished initialization of global data.
+ */
+u32 available_harts_lock = 1;
static inline bool supports_extension(char ext)
{
diff --git a/arch/riscv/cpu/start.S b/arch/riscv/cpu/start.S
index bcc0ff696d..f55b8cbc37 100644
--- a/arch/riscv/cpu/start.S
+++ b/arch/riscv/cpu/start.S
@@ -13,6 +13,7 @@
#include <config.h>
#include <common.h>
#include <elf.h>
+#include <asm/csr.h>
#include <asm/encoding.h>
#include <generated/asm-offsets.h>
@@ -45,6 +46,23 @@ _start:
/* mask all interrupts */
csrw MODE_PREFIX(ie), zero
+#ifdef CONFIG_SMP
+ /* check if hart is within range */
+ /* tp: hart id */
+ li t0, CONFIG_NR_CPUS
+ bge tp, t0, hart_out_of_bounds_loop
+#endif
+
+#ifdef CONFIG_SMP
+ /* set xSIE bit to receive IPIs */
+#ifdef CONFIG_RISCV_MMODE
+ li t0, MIE_MSIE
+#else
+ li t0, SIE_SSIE
+#endif
+ csrs MODE_PREFIX(ie), t0
+#endif
+
/*
* Set stackpointer in internal/ex RAM to call board_init_f
*/
@@ -56,7 +74,30 @@ call_board_init_f:
call_board_init_f_0:
mv a0, sp
jal board_init_f_alloc_reserve
+
+ /*
+ * Set global data pointer here for all harts, uninitialized at this
+ * point.
+ */
+ mv gp, a0
+
+ /* setup stack */
+#ifdef CONFIG_SMP
+ /* tp: hart id */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, a0, t0
+#else
mv sp, a0
+#endif
+
+ /*
+ * Pick hart to initialize global data and run U-Boot. The other harts
+ * wait for initialization to complete.
+ */
+ la t0, hart_lottery
+ li s2, 1
+ amoswap.w s2, t1, 0(t0)
+ bnez s2, wait_for_gd_init
la t0, prior_stage_fdt_address
SREG s1, 0(t0)
@@ -66,6 +107,33 @@ call_board_init_f_0:
/* save the boot hart id to global_data */
SREG tp, GD_BOOT_HART(gp)
+ la t0, available_harts_lock
+ fence rw, w
+ amoswap.w zero, zero, 0(t0)
+
+wait_for_gd_init:
+ la t0, available_harts_lock
+ li t1, 1
+1: amoswap.w t1, t1, 0(t0)
+ fence r, rw
+ bnez t1, 1b
+
+ /* register available harts in the available_harts mask */
+ li t1, 1
+ sll t1, t1, tp
+ LREG t2, GD_AVAILABLE_HARTS(gp)
+ or t2, t2, t1
+ SREG t2, GD_AVAILABLE_HARTS(gp)
+
+ fence rw, w
+ amoswap.w zero, zero, 0(t0)
+
+ /*
+ * Continue on hart lottery winner, others branch to
+ * secondary_hart_loop.
+ */
+ bnez s2, secondary_hart_loop
+
/* Enable cache */
jal icache_enable
jal dcache_enable
@@ -95,7 +163,14 @@ relocate_code:
*Set up the stack
*/
stack_setup:
+#ifdef CONFIG_SMP
+ /* tp: hart id */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, s2, t0
+#else
mv sp, s2
+#endif
+
la t0, _start
sub t6, s4, t0 /* t6 <- relocation offset */
beq t0, s4, clear_bss /* skip relocation */
@@ -175,13 +250,30 @@ clear_bss:
add t0, t0, t6 /* t0 <- rel __bss_start in RAM */
la t1, __bss_end /* t1 <- rel __bss_end in FLASH */
add t1, t1, t6 /* t1 <- rel __bss_end in RAM */
- beq t0, t1, call_board_init_r
+ beq t0, t1, relocate_secondary_harts
clbss_l:
SREG zero, 0(t0) /* clear loop... */
addi t0, t0, REGBYTES
bne t0, t1, clbss_l
+relocate_secondary_harts:
+#ifdef CONFIG_SMP
+ /* send relocation IPI */
+ la t0, secondary_hart_relocate
+ add a0, t0, t6
+
+ /* store relocation offset */
+ mv s5, t6
+
+ mv a1, s2
+ mv a2, s3
+ jal smp_call_function
+
+ /* restore relocation offset */
+ mv t6, s5
+#endif
+
/*
* We are done. Do not return, instead branch to second part of board
* initialization, now running from RAM.
@@ -202,3 +294,43 @@ call_board_init_r:
* jump to it ...
*/
jr t4 /* jump to board_init_r() */
+
+#ifdef CONFIG_SMP
+hart_out_of_bounds_loop:
+ /* Harts in this loop are out of bounds, increase CONFIG_NR_CPUS. */
+ wfi
+ j hart_out_of_bounds_loop
+#endif
+
+#ifdef CONFIG_SMP
+/* SMP relocation entry */
+secondary_hart_relocate:
+ /* a1: new sp */
+ /* a2: new gd */
+ /* tp: hart id */
+
+ /* setup stack */
+ slli t0, tp, CONFIG_STACK_SIZE_SHIFT
+ sub sp, a1, t0
+
+ /* update global data pointer */
+ mv gp, a2
+#endif
+
+secondary_hart_loop:
+ wfi
+
+#ifdef CONFIG_SMP
+ csrr t0, MODE_PREFIX(ip)
+#ifdef CONFIG_RISCV_MMODE
+ andi t0, t0, MIE_MSIE
+#else
+ andi t0, t0, SIE_SSIE
+#endif
+ beqz t0, secondary_hart_loop
+
+ mv a0, tp
+ jal handle_ipi
+#endif
+
+ j secondary_hart_loop