14 files changed, 588 insertions, 42 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index de323bf4b9..001ece3cf1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -19,6 +19,36 @@ config POSITION_INDEPENDENT
 	  from almost any address. This logic relies on the relocation
 	  information that is embedded into the binary to support U-Boot
 	  relocating itself to the top-of-RAM later during execution.
+
+config SYS_INIT_SP_BSS_OFFSET
+	int
+	help
+	  U-Boot typically uses a hard-coded value for the stack pointer
+	  before relocation. Define this option to instead calculate the
+	  initial SP at run-time. This is useful to avoid hard-coding addresses
+	  into U-Boot, so that can be loaded and executed at arbitrary
+	  addresses and thus avoid using arbitrary addresses at runtime. This
+	  option's value is the offset added to &_bss_start in order to
+	  calculate the stack pointer. This offset should be large enough so
+	  that the early malloc region, global data (gd), and early stack usage
+	  do not overlap any appended DTB.
+
+config LINUX_KERNEL_IMAGE_HEADER
+	bool
+	help
+	  Place a Linux kernel image header at the start of the U-Boot binary.
+	  The format of the header is described in the Linux kernel source at
+	  Documentation/arm64/booting.txt. This feature is useful since the
+	  image header reports the amount of memory (BSS and similar) that
+	  U-Boot needs to use, but which isn't part of the binary.
+
+if LINUX_KERNEL_IMAGE_HEADER
+config LNX_KRNL_IMG_TEXT_OFFSET_BASE
+	hex
+	help
+	  The value subtracted from CONFIG_SYS_TEXT_BASE to calculate the
+	  TEXT_OFFSET value written in to the Linux kernel image header.
+endif
 endif
 
 config STATIC_RELA
diff --git a/arch/arm/cpu/armv8/linux-kernel-image-header-vars.h b/arch/arm/cpu/armv8/linux-kernel-image-header-vars.h
new file mode 100644
index 0000000000..3e720937f0
--- /dev/null
+++ b/arch/arm/cpu/armv8/linux-kernel-image-header-vars.h
@@ -0,0 +1,86 @@
+/*
+ * (C) Copyright 2017 NVIDIA Corporation <www.nvidia.com>
+ *
+ * Derived from Linux kernel v4.14 files:
+ *
+ * arch/arm64/include/asm/assembler.h:
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
+ * Copyright (C) 1996-2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * arch/arm64/kernel/head.S:
+ * Based on arch/arm/kernel/head.S
+ * Copyright (C) 1994-2002 Russell King
+ * Copyright (C) 2003-2012 ARM Ltd.
+ * Authors:	Catalin Marinas <catalin.marinas@arm.com>
+ *		Will Deacon <will.deacon@arm.com>
+ *
+ * arch/arm64/kernel/image.h:
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * SPDX-License-Identifier:     GPL-2.0
+ */
+
+/*
+ * There aren't any ELF relocations we can use to endian-swap values known only
+ * at link time (e.g. the subtraction of two symbol addresses), so we must get
+ * the linker to endian-swap certain values before emitting them.
+ *
+ * Note that, in order for this to work when building the ELF64 PIE executable
+ * (for KASLR), these values should not be referenced via R_AARCH64_ABS64
+ * relocations, since these are fixed up at runtime rather than at build time
+ * when PIE is in effect. So we need to split them up in 32-bit high and low
+ * words.
+ */
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define DATA_LE32(data)				\
+	((((data) & 0x000000ff) << 24) |	\
+	 (((data) & 0x0000ff00) << 8)  |	\
+	 (((data) & 0x00ff0000) >> 8)  |	\
+	 (((data) & 0xff000000) >> 24))
+#else
+#define DATA_LE32(data) ((data) & 0xffffffff)
+#endif
+
+#define DEFINE_IMAGE_LE64(sym, data)				\
+	sym##_lo32 = DATA_LE32((data) & 0xffffffff);		\
+	sym##_hi32 = DATA_LE32((data) >> 32)
+
+#define __MAX(a, b)		(((a) > (b)) ? (a) : (b))
+#define __CODE_DATA_SIZE	(__bss_start - _start)
+#define __BSS_SIZE		(__bss_end - __bss_start)
+#ifdef CONFIG_SYS_INIT_SP_BSS_OFFSET
+#define __MAX_EXTRA_RAM_USAGE	__MAX(__BSS_SIZE, CONFIG_SYS_INIT_SP_BSS_OFFSET)
+#else
+#define __MAX_EXTRA_RAM_USAGE	__BSS_SIZE
+#endif
+#define __MEM_USAGE		(__CODE_DATA_SIZE + __MAX_EXTRA_RAM_USAGE)
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __HEAD_FLAG_BE		1
+#else
+#define __HEAD_FLAG_BE		0
+#endif
+
+#define __HEAD_FLAG_PAGE_SIZE	1 /* 4K hard-coded */
+
+#define __HEAD_FLAG_PHYS_BASE	1
+
+#define __HEAD_FLAGS		((__HEAD_FLAG_BE << 0) |	\
+				 (__HEAD_FLAG_PAGE_SIZE << 1) |	\
+				 (__HEAD_FLAG_PHYS_BASE << 3))
+
+#define TEXT_OFFSET (CONFIG_SYS_TEXT_BASE - \
+			CONFIG_LNX_KRNL_IMG_TEXT_OFFSET_BASE)
+
+/*
+ * These will output as part of the Image header, which should be little-endian
+ * regardless of the endianness of the kernel. While constant values could be
+ * endian swapped in head.S, all are done here for consistency.
+ */
+#define HEAD_SYMBOLS						\
+	DEFINE_IMAGE_LE64(_kernel_size_le, __MEM_USAGE);	\
+	DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET);	\
+	DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
+
+	HEAD_SYMBOLS
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
index f385ed40e1..7a98a1c95d 100644
--- a/arch/arm/cpu/armv8/start.S
+++ b/arch/arm/cpu/armv8/start.S
@@ -19,7 +19,9 @@
 
 .globl	_start
 _start:
-#ifdef CONFIG_ENABLE_ARM_SOC_BOOT0_HOOK
+#if defined(LINUX_KERNEL_IMAGE_HEADER)
+#include <asm/boot0-linux-kernel-header.h>
+#elif defined(CONFIG_ENABLE_ARM_SOC_BOOT0_HOOK)
 /*
  * Various SoCs need something special and SoC-specific up front in
  * order to boot, allow them to set that in their boot0.h file and then
diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds
index 22195b8834..7b76e0f9f0 100644
--- a/arch/arm/cpu/armv8/u-boot.lds
+++ b/arch/arm/cpu/armv8/u-boot.lds
@@ -159,4 +159,8 @@ SECTIONS
 	/DISCARD/ : { *(.plt*) }
 	/DISCARD/ : { *(.interp*) }
 	/DISCARD/ : { *(.gnu*) }
+
+#ifdef CONFIG_LINUX_KERNEL_IMAGE_HEADER
+#include "linux-kernel-image-header-vars.h"
+#endif
 }
diff --git a/arch/arm/dts/tegra-u-boot.dtsi b/arch/arm/dts/tegra-u-boot.dtsi
index cde591c5fc..4f692ee975 100644
--- a/arch/arm/dts/tegra-u-boot.dtsi
+++ b/arch/arm/dts/tegra-u-boot.dtsi
@@ -1,5 +1,11 @@
 #include <config.h>
 
+#ifdef CONFIG_SPL_TEXT_BASE
+#define U_BOOT_OFFSET (CONFIG_SYS_TEXT_BASE - CONFIG_SPL_TEXT_BASE)
+#else
+#define U_BOOT_OFFSET 0
+#endif
+
 / {
 	binman {
 		multiple-images;
@@ -9,8 +15,7 @@
 			u-boot-spl {
 			};
 			u-boot {
-				pos = <(CONFIG_SYS_TEXT_BASE -
-					CONFIG_SPL_TEXT_BASE)>;
+				pos = <(U_BOOT_OFFSET)>;
 			};
 		};
 
@@ -21,8 +26,7 @@
 			u-boot-spl {
 			};
 			u-boot {
-				pos = <(CONFIG_SYS_TEXT_BASE -
-					CONFIG_SPL_TEXT_BASE)>;
+				pos = <(U_BOOT_OFFSET)>;
 			};
 		};
 
@@ -32,8 +36,7 @@
 			u-boot-spl {
 			};
 			u-boot-nodtb {
-				pos = <(CONFIG_SYS_TEXT_BASE -
-					CONFIG_SPL_TEXT_BASE)>;
+				pos = <(U_BOOT_OFFSET)>;
 			};
 		};
 	};
diff --git a/arch/arm/include/asm/boot0-linux-kernel-header.h b/arch/arm/include/asm/boot0-linux-kernel-header.h
new file mode 100644
index 0000000000..ca28780daa
--- /dev/null
+++ b/arch/arm/include/asm/boot0-linux-kernel-header.h
@@ -0,0 +1,49 @@
+/*
+ * (C) Copyright 2017 NVIDIA Corporation <www.nvidia.com>
+ *
+ * Derived from Linux kernel v4.14 files:
+ *
+ * arch/arm64/include/asm/assembler.h:
+ * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
+ * Copyright (C) 1996-2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * arch/arm64/kernel/head.S:
+ * Based on arch/arm/kernel/head.S
+ * Copyright (C) 1994-2002 Russell King
+ * Copyright (C) 2003-2012 ARM Ltd.
+ * Authors:	Catalin Marinas <catalin.marinas@arm.com>
+ *		Will Deacon <will.deacon@arm.com>
+ *
+ * arch/arm64/kernel/image.h:
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * SPDX-License-Identifier:     GPL-2.0
+ */
+
+	/*
+	 * Emit a 64-bit absolute little endian symbol reference in a way that
+	 * ensures that it will be resolved at build time, even when building a
+	 * PIE binary. This requires cooperation from the linker script, which
+	 * must emit the lo32/hi32 halves individually.
+	 */
+	.macro	le64sym, sym
+	.long	\sym\()_lo32
+	.long	\sym\()_hi32
+	.endm
+
+.globl _start
+_start:
+	/*
+	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
+	 */
+	b	reset				/* branch to kernel start, magic */
+	.long	0				/* reserved */
+	le64sym	_kernel_offset_le		/* Image load offset from start of RAM, little-endian */
+	le64sym	_kernel_size_le			/* Effective size of kernel image, little-endian */
+	le64sym	_kernel_flags_le		/* Informative flags, little-endian */
+	.quad	0				/* reserved */
+	.quad	0				/* reserved */
+	.quad	0				/* reserved */
+	.ascii	"ARM\x64"			/* Magic number */
+	.long	0				/* reserved */
diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
index 5c62d9c144..89740657e0 100644
--- a/arch/arm/lib/bootm.c
+++ b/arch/arm/lib/bootm.c
@@ -47,7 +47,8 @@ static ulong get_sp(void)
 
 void arch_lmb_reserve(struct lmb *lmb)
 {
-	ulong sp;
+	ulong sp, bank_end;
+	int bank;
 
 	/*
 	 * Booting a (Linux) kernel image
@@ -63,8 +64,16 @@ void arch_lmb_reserve(struct lmb *lmb)
 
 	/* adjust sp by 4K to be safe */
 	sp -= 4096;
-	lmb_reserve(lmb, sp,
-		    gd->bd->bi_dram[0].start + gd->bd->bi_dram[0].size - sp);
+	for (bank = 0; bank < CONFIG_NR_DRAM_BANKS; bank++) {
+		if (sp < gd->bd->bi_dram[bank].start)
+			continue;
+		bank_end = gd->bd->bi_dram[bank].start +
+			gd->bd->bi_dram[bank].size;
+		if (sp >= bank_end)
+			continue;
+		lmb_reserve(lmb, sp, bank_end - sp);
+		break;
+	}
 }
 
 __weak void board_quiesce_devices(void)
diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S
index 9cb70552fe..a181283e0f 100644
--- a/arch/arm/lib/crt0_64.S
+++ b/arch/arm/lib/crt0_64.S
@@ -73,6 +73,9 @@ ENTRY(_main)
 	ldr	x0, =(CONFIG_TPL_STACK)
 #elif defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
 	ldr	x0, =(CONFIG_SPL_STACK)
+#elif defined(CONFIG_SYS_INIT_SP_BSS_OFFSET)
+	adr	x0, __bss_start
+	add	x0, x0, #CONFIG_SYS_INIT_SP_BSS_OFFSET
 #else
 	ldr	x0, =(CONFIG_SYS_INIT_SP_ADDR)
 #endif
diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig
index 51d143687b..fd0082d22a 100644
--- a/arch/arm/mach-tegra/Kconfig
+++ b/arch/arm/mach-tegra/Kconfig
@@ -60,8 +60,14 @@ config TEGRA_ARMV7_COMMON
 config TEGRA_ARMV8_COMMON
 	bool "Tegra 64-bit common options"
 	select ARM64
+	select LINUX_KERNEL_IMAGE_HEADER
 	select TEGRA_COMMON
 
+if TEGRA_ARMV8_COMMON
+config LNX_KRNL_IMG_TEXT_OFFSET_BASE
+	default 0x80000000
+endif
+
 choice
 	prompt "Tegra SoC select"
 	optional
diff --git a/arch/arm/mach-tegra/arm64-mmu.c b/arch/arm/mach-tegra/arm64-mmu.c
index a79a5192e0..3a126bdec4 100644
--- a/arch/arm/mach-tegra/arm64-mmu.c
+++ b/arch/arm/mach-tegra/arm64-mmu.c
@@ -12,7 +12,8 @@
 #include <asm/system.h>
 #include <asm/armv8/mmu.h>
 
-static struct mm_region tegra_mem_map[] = {
+/* size: IO + NR_DRAM_BANKS + terminator */
+struct mm_region tegra_mem_map[1 + CONFIG_NR_DRAM_BANKS + 1] = {
 	{
 		.virt = 0x0UL,
 		.phys = 0x0UL,
diff --git a/arch/arm/mach-tegra/tegra186/Kconfig b/arch/arm/mach-tegra/tegra186/Kconfig
index b2e53b58ca..479c0955ee 100644
--- a/arch/arm/mach-tegra/tegra186/Kconfig
+++ b/arch/arm/mach-tegra/tegra186/Kconfig
@@ -21,6 +21,9 @@ endchoice
 config SYS_SOC
 	default "tegra186"
 
+config SYS_INIT_SP_BSS_OFFSET
+	default 524288
+
 source "board/nvidia/p2771-0000/Kconfig"
 
 endif
diff --git a/arch/arm/mach-tegra/tegra186/nvtboot_board.c b/arch/arm/mach-tegra/tegra186/nvtboot_board.c
index b94eb424aa..8ecb454443 100644
--- a/arch/arm/mach-tegra/tegra186/nvtboot_board.c
+++ b/arch/arm/mach-tegra/tegra186/nvtboot_board.c
@@ -1,16 +1,278 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.
  *
  * SPDX-License-Identifier: GPL-2.0+
  */
 
+#include <stdlib.h>
 #include <common.h>
 #include <fdt_support.h>
 #include <fdtdec.h>
 #include <asm/arch/tegra.h>
+#include <asm/armv8/mmu.h>
+
+DECLARE_GLOBAL_DATA_PTR;
 
 extern unsigned long nvtboot_boot_x0;
 
+/*
+ * The following few functions run late during the boot process and dynamically
+ * calculate the load address of various binaries. To keep track of multiple
+ * allocations, some writable list of RAM banks must be used. tegra_mem_map[]
+ * is used for this purpose to avoid making yet another copy of the list of RAM
+ * banks. This is safe because tegra_mem_map[] is only used once during very
+ * early boot to create U-Boot's page tables, long before this code runs. If
+ * this assumption becomes invalid later, we can just fix the code to copy the
+ * list of RAM banks into some private data structure before running.
+ */
+
+extern struct mm_region tegra_mem_map[];
+
+static char *gen_varname(const char *var, const char *ext)
+{
+	size_t len_var = strlen(var);
+	size_t len_ext = strlen(ext);
+	size_t len = len_var + len_ext + 1;
+	char *varext = malloc(len);
+
+	if (!varext)
+		return 0;
+	strcpy(varext, var);
+	strcpy(varext + len_var, ext);
+	return varext;
+}
+
+static void mark_ram_allocated(int bank, u64 allocated_start, u64 allocated_end)
+{
+	u64 bank_start = tegra_mem_map[bank].virt;
+	u64 bank_size = tegra_mem_map[bank].size;
+	u64 bank_end = bank_start + bank_size;
+	bool keep_front = allocated_start != bank_start;
+	bool keep_tail = allocated_end != bank_end;
+
+	if (keep_front && keep_tail) {
+		/*
+		 * There are CONFIG_NR_DRAM_BANKS DRAM entries in the array,
+		 * starting at index 1 (index 0 is MMIO). So, we are at DRAM
+		 * entry "bank" not "bank - 1" as for a typical 0-base array.
+		 * The number of remaining DRAM entries is therefore
+		 * "CONFIG_NR_DRAM_BANKS - bank". We want to duplicate the
+		 * current entry and shift up the remaining entries, dropping
+		 * the last one. Thus, we must copy one fewer entry than the
+		 * number remaining.
+		 */
+		memmove(&tegra_mem_map[bank + 1], &tegra_mem_map[bank],
+			CONFIG_NR_DRAM_BANKS - bank - 1);
+		tegra_mem_map[bank].size = allocated_start - bank_start;
+		bank++;
+		tegra_mem_map[bank].virt = allocated_end;
+		tegra_mem_map[bank].phys = allocated_end;
+		tegra_mem_map[bank].size = bank_end - allocated_end;
+	} else if (keep_front) {
+		tegra_mem_map[bank].size = allocated_start - bank_start;
+	} else if (keep_tail) {
+		tegra_mem_map[bank].virt = allocated_end;
+		tegra_mem_map[bank].phys = allocated_end;
+		tegra_mem_map[bank].size = bank_end - allocated_end;
+	} else {
+		/*
+		 * We could move all subsequent banks down in the array but
+		 * that's not necessary for subsequent allocations to work, so
+		 * we skip doing so.
+		 */
+		tegra_mem_map[bank].size = 0;
+	}
+}
+
+static void reserve_ram(u64 start, u64 size)
+{
+	int bank;
+	u64 end = start + size;
+
+	for (bank = 1; bank <= CONFIG_NR_DRAM_BANKS; bank++) {
+		u64 bank_start = tegra_mem_map[bank].virt;
+		u64 bank_size = tegra_mem_map[bank].size;
+		u64 bank_end = bank_start + bank_size;
+
+		if (end <= bank_start || start > bank_end)
+			continue;
+		mark_ram_allocated(bank, start, end);
+		break;
+	}
+}
+
+static u64 alloc_ram(u64 size, u64 align, u64 offset)
+{
+	int bank;
+
+	for (bank = 1; bank <= CONFIG_NR_DRAM_BANKS; bank++) {
+		u64 bank_start = tegra_mem_map[bank].virt;
+		u64 bank_size = tegra_mem_map[bank].size;
+		u64 bank_end = bank_start + bank_size;
+		u64 allocated = ROUND(bank_start, align) + offset;
+		u64 allocated_end = allocated + size;
+
+		if (allocated_end > bank_end)
+			continue;
+		mark_ram_allocated(bank, allocated, allocated_end);
+		return allocated;
+	}
+	return 0;
+}
+
+static void set_calculated_aliases(char *aliases, u64 address)
+{
+	char *tmp, *alias;
+	int err;
+
+	aliases = strdup(aliases);
+	if (!aliases) {
+		pr_err("strdup(aliases) failed");
+		return;
+	}
+
+	tmp = aliases;
+	while (true) {
+		alias = strsep(&tmp, " ");
+		if (!alias)
+			break;
+		debug("%s: alias: %s\n", __func__, alias);
+		err = env_set_hex(alias, address);
+		if (err)
+			pr_err("Could not set %s\n", alias);
+	}
+
+	free(aliases);
+}
+
+static void set_calculated_env_var(const char *var)
+{
+	char *var_size;
+	char *var_align;
+	char *var_offset;
+	char *var_aliases;
+	u64 size;
+	u64 align;
+	u64 offset;
+	char *aliases;
+	u64 address;
+	int err;
+
+	var_size = gen_varname(var, "_size");
+	if (!var_size)
+		return;
+	var_align = gen_varname(var, "_align");
+	if (!var_align)
+		goto out_free_var_size;
+	var_offset = gen_varname(var, "_offset");
+	if (!var_offset)
+		goto out_free_var_align;
+	var_aliases = gen_varname(var, "_aliases");
+	if (!var_aliases)
+		goto out_free_var_offset;
+
+	size = env_get_hex(var_size, 0);
+	if (!size) {
+		pr_err("%s not set or zero\n", var_size);
+		goto out_free_var_aliases;
+	}
+	align = env_get_hex(var_align, 1);
+	/* Handle extant variables, but with a value of 0 */
+	if (!align)
+		align = 1;
+	offset = env_get_hex(var_offset, 0);
+	aliases = env_get(var_aliases);
+
+	debug("%s: Calc var %s; size=%llx, align=%llx, offset=%llx\n",
+	      __func__, var, size, align, offset);
+	if (aliases)
+		debug("%s: Aliases: %s\n", __func__, aliases);
+
+	address = alloc_ram(size, align, offset);
+	if (!address) {
+		pr_err("Could not allocate %s\n", var);
+		goto out_free_var_aliases;
+	}
+	debug("%s: Address %llx\n", __func__, address);
+
+	err = env_set_hex(var, address);
+	if (err)
+		pr_err("Could not set %s\n", var);
+	if (aliases)
+		set_calculated_aliases(aliases, address);
+
+out_free_var_aliases:
+	free(var_aliases);
+out_free_var_offset:
+	free(var_offset);
+out_free_var_align:
+	free(var_align);
+out_free_var_size:
+	free(var_size);
+}
+
+#ifdef DEBUG
+static void dump_ram_banks(void)
+{
+	int bank;
+
+	for (bank = 1; bank <= CONFIG_NR_DRAM_BANKS; bank++) {
+		u64 bank_start = tegra_mem_map[bank].virt;
+		u64 bank_size = tegra_mem_map[bank].size;
+		u64 bank_end = bank_start + bank_size;
+
+		if (!bank_size)
+			continue;
+		printf("%d: %010llx..%010llx (+%010llx)\n", bank - 1,
+		       bank_start, bank_end, bank_size);
+	}
+}
+#endif
+
+static void set_calculated_env_vars(void)
+{
+	char *vars, *tmp, *var;
+
+#ifdef DEBUG
+	printf("RAM banks before any calculated env. var.s:\n");
+	dump_ram_banks();
+#endif
+
+	reserve_ram(nvtboot_boot_x0, fdt_totalsize(nvtboot_boot_x0));
+
+#ifdef DEBUG
+	printf("RAM after reserving cboot DTB:\n");
+	dump_ram_banks();
+#endif
+
+	vars = env_get("calculated_vars");
+	if (!vars) {
+		debug("%s: No env var calculated_vars\n", __func__);
+		return;
+	}
+
+	vars = strdup(vars);
+	if (!vars) {
+		pr_err("strdup(calculated_vars) failed");
+		return;
+	}
+
+	tmp = vars;
+	while (true) {
+		var = strsep(&tmp, " ");
+		if (!var)
+			break;
+		debug("%s: var: %s\n", __func__, var);
+		set_calculated_env_var(var);
+#ifdef DEBUG
+		printf("RAM banks affter allocating %s:\n", var);
+		dump_ram_banks();
+#endif
+	}
+
+	free(vars);
+}
+
 static int set_fdt_addr(void)
 {
 	int ret;
@@ -60,6 +322,7 @@ static int set_ethaddr_from_nvtboot(void)
 
 int tegra_soc_board_init_late(void)
 {
+	set_calculated_env_vars();
 	/*
 	 * Ignore errors here; the value may not be used depending on
 	 * extlinux.conf or boot script content.
diff --git a/arch/arm/mach-tegra/tegra186/nvtboot_mem.c b/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
index 5224ef641c..2ca59747d4 100644
--- a/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
+++ b/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.
  *
  * SPDX-License-Identifier: GPL-2.0+
  */
@@ -8,28 +8,48 @@
 #include <fdt_support.h>
 #include <fdtdec.h>
 #include <asm/arch/tegra.h>
+#include <asm/armv8/mmu.h>
+
+#define SZ_4G 0x100000000ULL
+
+/*
+ * Size of a region that's large enough to hold the relocated U-Boot and all
+ * other allocations made around it (stack, heap, page tables, etc.)
+ * In practice, running "bdinfo" at the shell prompt, the stack reaches about
+ * 5MB from the address selected for ram_top as of the time of writing,
+ * so a 16MB region should be plenty.
+ */
+#define MIN_USABLE_RAM_SIZE SZ_16M
+/*
+ * The amount of space we expect to require for stack usage. Used to validate
+ * that all reservations fit into the region selected for the relocation target
+ */
+#define MIN_USABLE_STACK_SIZE SZ_1M
 
 DECLARE_GLOBAL_DATA_PTR;
 
 extern unsigned long nvtboot_boot_x0;
+extern struct mm_region tegra_mem_map[];
 
 /*
- * A parsed version of /memory/reg from the DTB that is passed to U-Boot in x0.
- *
- * We only support up to two banks since that's all the binary  bootloader
- * ever sets. We assume bank 0 is RAM below 4G and bank 1 is RAM  above 4G.
- * This is all a fairly safe assumption, since the L4T kernel makes  the same
- * assumptions, so the bootloader is unlikely to change.
- *
- * This is written to before relocation, and hence cannot be in .bss, since
- * .bss overlaps the DTB that's appended to the U-Boot binary. The initializer
- * forces this into .data and avoids this issue. This also has the nice side-
- * effect of the content being valid after relocation.
+ * These variables are written to before relocation, and hence cannot be
+ * in.bss, since .bss overlaps the DTB that's appended to the U-Boot binary.
+ * The section attribute forces this into .data and avoids this issue. This
+ * also has the nice side-effect of the content being valid after relocation.
+ */
+
+/* The number of valid entries in ram_banks[] */
+static int ram_bank_count __attribute__((section(".data")));
+
+/*
+ * The usable top-of-RAM for U-Boot. This is both:
+ * a) Below 4GB to avoid issues with peripherals that use 32-bit addressing.
+ * b) At the end of a region that has enough space to hold the relocated U-Boot
+ *    and all other allocations made around it (stack, heap, page tables, etc.)
  */
-static struct {
-	u64 start;
-	u64 size;
-} ram_banks[2] = {{1}};
+static u64 ram_top __attribute__((section(".data")));
+/* The base address of the region of RAM that ends at ram_top */
+static u64 region_base __attribute__((section(".data")));
 
 int dram_init(void)
 {
@@ -38,8 +58,6 @@ int dram_init(void)
 	int node, len, i;
 	const u32 *prop;
 
-	memset(ram_banks, 0, sizeof(ram_banks));
-
 	na = fdtdec_get_uint(nvtboot_blob, 0, "#address-cells", 2);
 	ns = fdtdec_get_uint(nvtboot_blob, 0, "#size-cells", 2);
 
@@ -54,37 +72,103 @@ int dram_init(void)
 		hang();
 	}
 
-	len /= (na + ns);
-	if (len > ARRAY_SIZE(ram_banks))
-		len = ARRAY_SIZE(ram_banks);
+	/* Calculate the true # of base/size pairs to read */
+	len /= 4;		/* Convert bytes to number of cells */
+	len /= (na + ns);	/* Convert cells to number of banks */
+	if (len > CONFIG_NR_DRAM_BANKS)
+		len = CONFIG_NR_DRAM_BANKS;
 
+	/* Parse the /memory node, and save useful entries */
 	gd->ram_size = 0;
+	ram_bank_count = 0;
 	for (i = 0; i < len; i++) {
-		ram_banks[i].start = fdt_read_number(prop, na);
+		u64 bank_start, bank_end, bank_size, usable_bank_size;
+
+		/* Extract raw memory region data from DTB */
+		bank_start = fdt_read_number(prop, na);
 		prop += na;
-		ram_banks[i].size = fdt_read_number(prop, ns);
+		bank_size = fdt_read_number(prop, ns);
 		prop += ns;
-		gd->ram_size += ram_banks[i].size;
+		gd->ram_size += bank_size;
+		bank_end = bank_start + bank_size;
+		debug("Bank %d: %llx..%llx (+%llx)\n", i,
+		      bank_start, bank_end, bank_size);
+
+		/*
+		 * Align the bank to MMU section size. This is not strictly
+		 * necessary, since the translation table construction code
+		 * handles page granularity without issue. However, aligning
+		 * the MMU entries reduces the size and number of levels in the
+		 * page table, so is worth it.
+		 */
+		bank_start = ROUND(bank_start, SZ_2M);
+		bank_end = bank_end & ~(SZ_2M - 1);
+		bank_size = bank_end - bank_start;
+		debug("  aligned: %llx..%llx (+%llx)\n",
+		      bank_start, bank_end, bank_size);
+		if (bank_end <= bank_start)
+			continue;
+
+		/* Record data used to create MMU translation tables */
+		ram_bank_count++;
+		/* Index below is deliberately 1-based to skip MMIO entry */
+		tegra_mem_map[ram_bank_count].virt = bank_start;
+		tegra_mem_map[ram_bank_count].phys = bank_start;
+		tegra_mem_map[ram_bank_count].size = bank_size;
+		tegra_mem_map[ram_bank_count].attrs =
+			PTE_BLOCK_MEMTYPE(MT_NORMAL) | PTE_BLOCK_INNER_SHARE;
+
+		/* Determine best bank to relocate U-Boot into */
+		if (bank_end > SZ_4G)
+			bank_end = SZ_4G;
+		debug("  end  %llx (usable)\n", bank_end);
+		usable_bank_size = bank_end - bank_start;
+		debug("  size %llx (usable)\n", usable_bank_size);
+		if ((usable_bank_size >= MIN_USABLE_RAM_SIZE) &&
+		    (bank_end > ram_top)) {
+			ram_top = bank_end;
+			region_base = bank_start;
+			debug("ram top now %llx\n", ram_top);
+		}
+	}
+
+	/* Ensure memory map contains the desired sentinel entry */
+	tegra_mem_map[ram_bank_count + 1].virt = 0;
+	tegra_mem_map[ram_bank_count + 1].phys = 0;
+	tegra_mem_map[ram_bank_count + 1].size = 0;
+	tegra_mem_map[ram_bank_count + 1].attrs = 0;
+
+	/* Error out if a relocation target couldn't be found */
+	if (!ram_top) {
+		pr_err("Can't find a usable RAM top");
+		hang();
 	}
 
 	return 0;
 }
 
-extern unsigned long nvtboot_boot_x0;
-
 int dram_init_banksize(void)
 {
 	int i;
 
-	for (i = 0; i < 2; i++) {
-		gd->bd->bi_dram[i].start = ram_banks[i].start;
-		gd->bd->bi_dram[i].size = ram_banks[i].size;
+	if ((gd->start_addr_sp - region_base) < MIN_USABLE_STACK_SIZE) {
+		pr_err("Reservations exceed chosen region size");
+		hang();
+	}
+
+	for (i = 0; i < ram_bank_count; i++) {
+		gd->bd->bi_dram[i].start = tegra_mem_map[1 + i].virt;
+		gd->bd->bi_dram[i].size = tegra_mem_map[1 + i].size;
 	}
 
+#ifdef CONFIG_PCI
+	gd->pci_ram_top = ram_top;
+#endif
+
 	return 0;
 }
 
 ulong board_get_usable_ram_top(ulong total_size)
 {
-	return ram_banks[0].start + ram_banks[0].size;
+	return ram_top;
 }
diff --git a/arch/arm/mach-tegra/tegra210/Kconfig b/arch/arm/mach-tegra/tegra210/Kconfig
index 3637473051..250738aed3 100644
--- a/arch/arm/mach-tegra/tegra210/Kconfig
+++ b/arch/arm/mach-tegra/tegra210/Kconfig
@@ -40,6 +40,9 @@ endchoice
 config SYS_SOC
 	default "tegra210"
 
+config SYS_INIT_SP_BSS_OFFSET
+	default 524288
+
 source "board/nvidia/e2220-1170/Kconfig"
 source "board/nvidia/p2371-0000/Kconfig"
 source "board/nvidia/p2371-2180/Kconfig"