6 files changed, 392 insertions, 32 deletions
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 51d143687b..fd0082d22a 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -60,8 +60,14 @@ config TEGRA_ARMV7_COMMON
 config TEGRA_ARMV8_COMMON
 	bool "Tegra 64-bit common options"
 	select ARM64
+	select LINUX_KERNEL_IMAGE_HEADER
 	select TEGRA_COMMON
 
+if TEGRA_ARMV8_COMMON
+config LNX_KRNL_IMG_TEXT_OFFSET_BASE
+	default 0x80000000
+endif
+
 choice
 	prompt "Tegra SoC select"
 	optional
diff --git a/arch/arm/mach-tegra/arm64-mmu.c b/arch/arm/mach-tegra/arm64-mmu.c
index a79a5192e0..3a126bdec4 100644
--- a/arch/arm/mach-tegra/arm64-mmu.c
+++ b/arch/arm/mach-tegra/arm64-mmu.c
@@ -12,7 +12,8 @@
 #include <asm/system.h>
 #include <asm/armv8/mmu.h>
 
-static struct mm_region tegra_mem_map[] = {
+/* size: IO + NR_DRAM_BANKS + terminator */
+struct mm_region tegra_mem_map[1 + CONFIG_NR_DRAM_BANKS + 1] = {
 	{
 		.virt = 0x0UL,
 		.phys = 0x0UL,
diff --git a/arch/arm/mach-tegra/tegra186/Kconfig b/arch/arm/mach-tegra/tegra186/Kconfig
index b2e53b58ca..479c0955ee 100644
--- a/arch/arm/mach-tegra/tegra186/Kconfig
+++ b/arch/arm/mach-tegra/tegra186/Kconfig
@@ -21,6 +21,9 @@ endchoice
 config SYS_SOC
 	default "tegra186"
 
+config SYS_INIT_SP_BSS_OFFSET
+	default 524288
+
 source "board/nvidia/p2771-0000/Kconfig"
 
 endif
diff --git a/arch/arm/mach-tegra/tegra186/nvtboot_board.c b/arch/arm/mach-tegra/tegra186/nvtboot_board.c
index b94eb424aa..8ecb454443 100644
--- a/arch/arm/mach-tegra/tegra186/nvtboot_board.c
+++ b/arch/arm/mach-tegra/tegra186/nvtboot_board.c
@@ -1,16 +1,278 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.
  *
  * SPDX-License-Identifier: GPL-2.0+
  */
 
+#include <stdlib.h>
 #include <common.h>
 #include <fdt_support.h>
 #include <fdtdec.h>
 #include <asm/arch/tegra.h>
+#include <asm/armv8/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
 
 extern unsigned long nvtboot_boot_x0;
 
+/*
+ * The following few functions run late during the boot process and dynamically
+ * calculate the load address of various binaries. To keep track of multiple
+ * allocations, some writable list of RAM banks must be used. tegra_mem_map[]
+ * is used for this purpose to avoid making yet another copy of the list of RAM
+ * banks. This is safe because tegra_mem_map[] is only used once during very
+ * early boot to create U-Boot's page tables, long before this code runs. If
+ * this assumption becomes invalid later, we can just fix the code to copy the
+ * list of RAM banks into some private data structure before running.
+ */
+
+extern struct mm_region tegra_mem_map[];
+
+static char *gen_varname(const char *var, const char *ext)
+{
+	size_t len_var = strlen(var);
+	size_t len_ext = strlen(ext);
+	size_t len = len_var + len_ext + 1;
+	char *varext = malloc(len);
+
+	if (!varext)
+		return 0;
+	strcpy(varext, var);
+	strcpy(varext + len_var, ext);
+	return varext;
+}
+
+static void mark_ram_allocated(int bank, u64 allocated_start, u64 allocated_end)
+{
+	u64 bank_start = tegra_mem_map[bank].virt;
+	u64 bank_size = tegra_mem_map[bank].size;
+	u64 bank_end = bank_start + bank_size;
+	bool keep_front = allocated_start != bank_start;
+	bool keep_tail = allocated_end != bank_end;
+
+	if (keep_front && keep_tail) {
+		/*
+		 * There are CONFIG_NR_DRAM_BANKS DRAM entries in the array,
+		 * starting at index 1 (index 0 is MMIO). So, we are at DRAM
+		 * entry "bank" not "bank - 1" as for a typical 0-base array.
+		 * The number of remaining DRAM entries is therefore
+		 * "CONFIG_NR_DRAM_BANKS - bank". We want to duplicate the
+		 * current entry and shift up the remaining entries, dropping
+		 * the last one. Thus, we must copy one fewer entry than the
+		 * number remaining.
+		 */
+		memmove(&tegra_mem_map[bank + 1], &tegra_mem_map[bank],
+			CONFIG_NR_DRAM_BANKS - bank - 1);
+		tegra_mem_map[bank].size = allocated_start - bank_start;
+		bank++;
+		tegra_mem_map[bank].virt = allocated_end;
+		tegra_mem_map[bank].phys = allocated_end;
+		tegra_mem_map[bank].size = bank_end - allocated_end;
+	} else if (keep_front) {
+		tegra_mem_map[bank].size = allocated_start - bank_start;
+	} else if (keep_tail) {
+		tegra_mem_map[bank].virt = allocated_end;
+		tegra_mem_map[bank].phys = allocated_end;
+		tegra_mem_map[bank].size = bank_end - allocated_end;
+	} else {
+		/*
+		 * We could move all subsequent banks down in the array but
+		 * that's not necessary for subsequent allocations to work, so
+		 * we skip doing so.
+		 */
+		tegra_mem_map[bank].size = 0;
+	}
+}
+
+static void reserve_ram(u64 start, u64 size)
+{
+	int bank;
+	u64 end = start + size;
+
+	for (bank = 1; bank <= CONFIG_NR_DRAM_BANKS; bank++) {
+		u64 bank_start = tegra_mem_map[bank].virt;
+		u64 bank_size = tegra_mem_map[bank].size;
+		u64 bank_end = bank_start + bank_size;
+
+		if (end <= bank_start || start > bank_end)
+			continue;
+		mark_ram_allocated(bank, start, end);
+		break;
+	}
+}
+
+static u64 alloc_ram(u64 size, u64 align, u64 offset)
+{
+	int bank;
+
+	for (bank = 1; bank <= CONFIG_NR_DRAM_BANKS; bank++) {
+		u64 bank_start = tegra_mem_map[bank].virt;
+		u64 bank_size = tegra_mem_map[bank].size;
+		u64 bank_end = bank_start + bank_size;
+		u64 allocated = ROUND(bank_start, align) + offset;
+		u64 allocated_end = allocated + size;
+
+		if (allocated_end > bank_end)
+			continue;
+		mark_ram_allocated(bank, allocated, allocated_end);
+		return allocated;
+	}
+	return 0;
+}
+
+static void set_calculated_aliases(char *aliases, u64 address)
+{
+	char *tmp, *alias;
+	int err;
+
+	aliases = strdup(aliases);
+	if (!aliases) {
+		pr_err("strdup(aliases) failed");
+		return;
+	}
+
+	tmp = aliases;
+	while (true) {
+		alias = strsep(&tmp, " ");
+		if (!alias)
+			break;
+		debug("%s: alias: %s\n", __func__, alias);
+		err = env_set_hex(alias, address);
+		if (err)
+			pr_err("Could not set %s\n", alias);
+	}
+
+	free(aliases);
+}
+
+static void set_calculated_env_var(const char *var)
+{
+	char *var_size;
+	char *var_align;
+	char *var_offset;
+	char *var_aliases;
+	u64 size;
+	u64 align;
+	u64 offset;
+	char *aliases;
+	u64 address;
+	int err;
+
+	var_size = gen_varname(var, "_size");
+	if (!var_size)
+		return;
+	var_align = gen_varname(var, "_align");
+	if (!var_align)
+		goto out_free_var_size;
+	var_offset = gen_varname(var, "_offset");
+	if (!var_offset)
+		goto out_free_var_align;
+	var_aliases = gen_varname(var, "_aliases");
+	if (!var_aliases)
+		goto out_free_var_offset;
+
+	size = env_get_hex(var_size, 0);
+	if (!size) {
+		pr_err("%s not set or zero\n", var_size);
+		goto out_free_var_aliases;
+	}
+	align = env_get_hex(var_align, 1);
+	/* Handle extant variables, but with a value of 0 */
+	if (!align)
+		align = 1;
+	offset = env_get_hex(var_offset, 0);
+	aliases = env_get(var_aliases);
+
+	debug("%s: Calc var %s; size=%llx, align=%llx, offset=%llx\n",
+	      __func__, var, size, align, offset);
+	if (aliases)
+		debug("%s: Aliases: %s\n", __func__, aliases);
+
+	address = alloc_ram(size, align, offset);
+	if (!address) {
+		pr_err("Could not allocate %s\n", var);
+		goto out_free_var_aliases;
+	}
+	debug("%s: Address %llx\n", __func__, address);
+
+	err = env_set_hex(var, address);
+	if (err)
+		pr_err("Could not set %s\n", var);
+	if (aliases)
+		set_calculated_aliases(aliases, address);
+
+out_free_var_aliases:
+	free(var_aliases);
+out_free_var_offset:
+	free(var_offset);
+out_free_var_align:
+	free(var_align);
+out_free_var_size:
+	free(var_size);
+}
+
+#ifdef DEBUG
+static void dump_ram_banks(void)
+{
+	int bank;
+
+	for (bank = 1; bank <= CONFIG_NR_DRAM_BANKS; bank++) {
+		u64 bank_start = tegra_mem_map[bank].virt;
+		u64 bank_size = tegra_mem_map[bank].size;
+		u64 bank_end = bank_start + bank_size;
+
+		if (!bank_size)
+			continue;
+		printf("%d: %010llx..%010llx (+%010llx)\n", bank - 1,
+		       bank_start, bank_end, bank_size);
+	}
+}
+#endif
+
+static void set_calculated_env_vars(void)
+{
+	char *vars, *tmp, *var;
+
+#ifdef DEBUG
+	printf("RAM banks before any calculated env. var.s:\n");
+	dump_ram_banks();
+#endif
+
+	reserve_ram(nvtboot_boot_x0, fdt_totalsize(nvtboot_boot_x0));
+
+#ifdef DEBUG
+	printf("RAM after reserving cboot DTB:\n");
+	dump_ram_banks();
+#endif
+
+	vars = env_get("calculated_vars");
+	if (!vars) {
+		debug("%s: No env var calculated_vars\n", __func__);
+		return;
+	}
+
+	vars = strdup(vars);
+	if (!vars) {
+		pr_err("strdup(calculated_vars) failed");
+		return;
+	}
+
+	tmp = vars;
+	while (true) {
+		var = strsep(&tmp, " ");
+		if (!var)
+			break;
+		debug("%s: var: %s\n", __func__, var);
+		set_calculated_env_var(var);
+#ifdef DEBUG
+		printf("RAM banks affter allocating %s:\n", var);
+		dump_ram_banks();
+#endif
+	}
+
+	free(vars);
+}
+
 static int set_fdt_addr(void)
 {
 	int ret;
@@ -60,6 +322,7 @@ static int set_ethaddr_from_nvtboot(void)
 
 int tegra_soc_board_init_late(void)
 {
+	set_calculated_env_vars();
 	/*
 	 * Ignore errors here; the value may not be used depending on
 	 * extlinux.conf or boot script content.
diff --git a/arch/arm/mach-tegra/tegra186/nvtboot_mem.c b/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
index 5224ef641c..2ca59747d4 100644
--- a/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
+++ b/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.
  *
  * SPDX-License-Identifier: GPL-2.0+
  */
@@ -8,28 +8,48 @@
 #include <fdt_support.h>
 #include <fdtdec.h>
 #include <asm/arch/tegra.h>
+#include <asm/armv8/mmu.h>
+
+#define SZ_4G 0x100000000ULL
+
+/*
+ * Size of a region that's large enough to hold the relocated U-Boot and all
+ * other allocations made around it (stack, heap, page tables, etc.)
+ * In practice, running "bdinfo" at the shell prompt, the stack reaches about
+ * 5MB from the address selected for ram_top as of the time of writing,
+ * so a 16MB region should be plenty.
+ */
+#define MIN_USABLE_RAM_SIZE SZ_16M
+/*
+ * The amount of space we expect to require for stack usage. Used to validate
+ * that all reservations fit into the region selected for the relocation target
+ */
+#define MIN_USABLE_STACK_SIZE SZ_1M
 
 DECLARE_GLOBAL_DATA_PTR;
 
 extern unsigned long nvtboot_boot_x0;
+extern struct mm_region tegra_mem_map[];
 
 /*
- * A parsed version of /memory/reg from the DTB that is passed to U-Boot in x0.
- *
- * We only support up to two banks since that's all the binary  bootloader
- * ever sets. We assume bank 0 is RAM below 4G and bank 1 is RAM  above 4G.
- * This is all a fairly safe assumption, since the L4T kernel makes  the same
- * assumptions, so the bootloader is unlikely to change.
- *
- * This is written to before relocation, and hence cannot be in .bss, since
- * .bss overlaps the DTB that's appended to the U-Boot binary. The initializer
- * forces this into .data and avoids this issue. This also has the nice side-
- * effect of the content being valid after relocation.
+ * These variables are written to before relocation, and hence cannot be
+ * in.bss, since .bss overlaps the DTB that's appended to the U-Boot binary.
+ * The section attribute forces this into .data and avoids this issue. This
+ * also has the nice side-effect of the content being valid after relocation.
+ */
+
+/* The number of valid entries in ram_banks[] */
+static int ram_bank_count __attribute__((section(".data")));
+
+/*
+ * The usable top-of-RAM for U-Boot. This is both:
+ * a) Below 4GB to avoid issues with peripherals that use 32-bit addressing.
+ * b) At the end of a region that has enough space to hold the relocated U-Boot
+ *    and all other allocations made around it (stack, heap, page tables, etc.)
  */
-static struct {
-	u64 start;
-	u64 size;
-} ram_banks[2] = {{1}};
+static u64 ram_top __attribute__((section(".data")));
+/* The base address of the region of RAM that ends at ram_top */
+static u64 region_base __attribute__((section(".data")));
 
 int dram_init(void)
 {
@@ -38,8 +58,6 @@ int dram_init(void)
 	int node, len, i;
 	const u32 *prop;
 
-	memset(ram_banks, 0, sizeof(ram_banks));
-
 	na = fdtdec_get_uint(nvtboot_blob, 0, "#address-cells", 2);
 	ns = fdtdec_get_uint(nvtboot_blob, 0, "#size-cells", 2);
 
@@ -54,37 +72,103 @@ int dram_init(void)
 		hang();
 	}
 
-	len /= (na + ns);
-	if (len > ARRAY_SIZE(ram_banks))
-		len = ARRAY_SIZE(ram_banks);
+	/* Calculate the true # of base/size pairs to read */
+	len /= 4;		/* Convert bytes to number of cells */
+	len /= (na + ns);	/* Convert cells to number of banks */
+	if (len > CONFIG_NR_DRAM_BANKS)
+		len = CONFIG_NR_DRAM_BANKS;
 
+	/* Parse the /memory node, and save useful entries */
 	gd->ram_size = 0;
+	ram_bank_count = 0;
 	for (i = 0; i < len; i++) {
-		ram_banks[i].start = fdt_read_number(prop, na);
+		u64 bank_start, bank_end, bank_size, usable_bank_size;
+
+		/* Extract raw memory region data from DTB */
+		bank_start = fdt_read_number(prop, na);
 		prop += na;
-		ram_banks[i].size = fdt_read_number(prop, ns);
+		bank_size = fdt_read_number(prop, ns);
 		prop += ns;
-		gd->ram_size += ram_banks[i].size;
+		gd->ram_size += bank_size;
+		bank_end = bank_start + bank_size;
+		debug("Bank %d: %llx..%llx (+%llx)\n", i,
+		      bank_start, bank_end, bank_size);
+
+		/*
+		 * Align the bank to MMU section size. This is not strictly
+		 * necessary, since the translation table construction code
+		 * handles page granularity without issue. However, aligning
+		 * the MMU entries reduces the size and number of levels in the
+		 * page table, so is worth it.
+		 */
+		bank_start = ROUND(bank_start, SZ_2M);
+		bank_end = bank_end & ~(SZ_2M - 1);
+		bank_size = bank_end - bank_start;
+		debug("  aligned: %llx..%llx (+%llx)\n",
+		      bank_start, bank_end, bank_size);
+		if (bank_end <= bank_start)
+			continue;
+
+		/* Record data used to create MMU translation tables */
+		ram_bank_count++;
+		/* Index below is deliberately 1-based to skip MMIO entry */
+		tegra_mem_map[ram_bank_count].virt = bank_start;
+		tegra_mem_map[ram_bank_count].phys = bank_start;
+		tegra_mem_map[ram_bank_count].size = bank_size;
+		tegra_mem_map[ram_bank_count].attrs =
+			PTE_BLOCK_MEMTYPE(MT_NORMAL) | PTE_BLOCK_INNER_SHARE;
+
+		/* Determine best bank to relocate U-Boot into */
+		if (bank_end > SZ_4G)
+			bank_end = SZ_4G;
+		debug("  end  %llx (usable)\n", bank_end);
+		usable_bank_size = bank_end - bank_start;
+		debug("  size %llx (usable)\n", usable_bank_size);
+		if ((usable_bank_size >= MIN_USABLE_RAM_SIZE) &&
+		    (bank_end > ram_top)) {
+			ram_top = bank_end;
+			region_base = bank_start;
+			debug("ram top now %llx\n", ram_top);
+		}
+	}
+
+	/* Ensure memory map contains the desired sentinel entry */
+	tegra_mem_map[ram_bank_count + 1].virt = 0;
+	tegra_mem_map[ram_bank_count + 1].phys = 0;
+	tegra_mem_map[ram_bank_count + 1].size = 0;
+	tegra_mem_map[ram_bank_count + 1].attrs = 0;
+
+	/* Error out if a relocation target couldn't be found */
+	if (!ram_top) {
+		pr_err("Can't find a usable RAM top");
+		hang();
 	}
 
 	return 0;
 }
 
-extern unsigned long nvtboot_boot_x0;
-
 int dram_init_banksize(void)
 {
 	int i;
 
-	for (i = 0; i < 2; i++) {
-		gd->bd->bi_dram[i].start = ram_banks[i].start;
-		gd->bd->bi_dram[i].size = ram_banks[i].size;
+	if ((gd->start_addr_sp - region_base) < MIN_USABLE_STACK_SIZE) {
+		pr_err("Reservations exceed chosen region size");
+		hang();
+	}
+
+	for (i = 0; i < ram_bank_count; i++) {
+		gd->bd->bi_dram[i].start = tegra_mem_map[1 + i].virt;
+		gd->bd->bi_dram[i].size = tegra_mem_map[1 + i].size;
 	}
 
+#ifdef CONFIG_PCI
+	gd->pci_ram_top = ram_top;
+#endif
+
 	return 0;
 }
 
 ulong board_get_usable_ram_top(ulong total_size)
 {
-	return ram_banks[0].start + ram_banks[0].size;
+	return ram_top;
 }
diff --git a/arch/arm/mach-tegra/tegra210/Kconfig b/arch/arm/mach-tegra/tegra210/Kconfig
index 3637473051..250738aed3 100644
--- a/arch/arm/mach-tegra/tegra210/Kconfig
+++ b/arch/arm/mach-tegra/tegra210/Kconfig
@@ -40,6 +40,9 @@ endchoice
 config SYS_SOC
 	default "tegra210"
 
+config SYS_INIT_SP_BSS_OFFSET
+	default 524288
+
 source "board/nvidia/e2220-1170/Kconfig"
 source "board/nvidia/p2371-0000/Kconfig"
 source "board/nvidia/p2371-2180/Kconfig"