Merge patch series "riscv: Relocatable NOMMU kernels"

Samuel Holland <samuel.holland@sifive.com> says:

Currently, RISC-V NOMMU kernels are linked at CONFIG_PAGE_OFFSET, and
since they are not relocatable, must be loaded at this address as well.
CONFIG_PAGE_OFFSET is not a user-visible Kconfig option, so its value is
not obvious, and users must patch the kernel source if they want to load
it at a different address.

Make NOMMU kernels more portable by making them relocatable by default.
This allows a single kernel binary to work when loaded at any address.

* b4-shazam-merge:
  riscv: Remove CONFIG_PAGE_OFFSET
  riscv: Support CONFIG_RELOCATABLE on riscv32
  asm-generic: Always define Elf_Rel and Elf_Rela
  riscv: Support CONFIG_RELOCATABLE on NOMMU
  riscv: Allow NOMMU kernels to access all of RAM
  riscv: Remove duplicate CONFIG_PAGE_OFFSET definition

Link: https://lore.kernel.org/r/20241026171441.3047904-1-samuel.holland@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
This commit is contained in:
Palmer Dabbelt 2025-03-26 15:55:45 -07:00
commit f633de4aa4
No known key found for this signature in database
GPG Key ID: 2E1319F35FBB1889
7 changed files with 73 additions and 82 deletions

View File

@ -202,6 +202,7 @@ config RISCV
select PCI_DOMAINS_GENERIC if PCI
select PCI_ECAM if (ACPI && PCI)
select PCI_MSI if PCI
select RELOCATABLE if !MMU && !PHYS_RAM_BASE_FIXED
select RISCV_ALTERNATIVE if !XIP_KERNEL
select RISCV_APLIC
select RISCV_IMSIC
@ -289,13 +290,6 @@ config MMU
Select if you want MMU-based virtualised addressing space
support by paged memory management. If unsure, say 'Y'.
config PAGE_OFFSET
hex
default 0x80000000 if !MMU && RISCV_M_MODE
default 0x80200000 if !MMU
default 0xc0000000 if 32BIT
default 0xff60000000000000 if 64BIT
config KASAN_SHADOW_OFFSET
hex
depends on KASAN_GENERIC
@ -1100,7 +1094,7 @@ config PARAVIRT_TIME_ACCOUNTING
config RELOCATABLE
bool "Build a relocatable kernel"
depends on MMU && 64BIT && !XIP_KERNEL
depends on !XIP_KERNEL
select MODULE_SECTIONS if MODULES
help
This builds a kernel as a Position Independent Executable (PIE),

View File

@ -98,7 +98,6 @@ KBUILD_AFLAGS += -march=$(riscv-march-y)
CC_FLAGS_FPU := -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)([^v_]*)v?/\1\2/')
KBUILD_CFLAGS += -mno-save-restore
KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
ifeq ($(CONFIG_CMODEL_MEDLOW),y)
KBUILD_CFLAGS += -mcmodel=medlow

View File

@ -1,5 +1,9 @@
ifdef CONFIG_RELOCATABLE
KBUILD_CFLAGS += -fno-pie
# We can't use PIC/PIE when handling early-boot errata parsing, as the kernel
# doesn't have a GOT setup at that point. So instead just use medany: it's
# usually position-independent, so it should be good enough for the errata
# handling.
KBUILD_CFLAGS += -fno-pie -mcmodel=medany
endif
ifdef CONFIG_RISCV_ALTERNATIVE_EARLY

View File

@ -24,21 +24,22 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
#ifdef CONFIG_64BIT
#ifdef CONFIG_MMU
#define PAGE_OFFSET kernel_map.page_offset
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#endif
/*
* By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so
* define the PAGE_OFFSET value for SV48 and SV39.
*/
#ifdef CONFIG_64BIT
#define PAGE_OFFSET_L5 _AC(0xff60000000000000, UL)
#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
#define PAGE_OFFSET_L3 _AC(0xffffffd600000000, UL)
#ifdef CONFIG_XIP_KERNEL
#define PAGE_OFFSET PAGE_OFFSET_L3
#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#define PAGE_OFFSET kernel_map.page_offset
#endif /* CONFIG_XIP_KERNEL */
#else
#define PAGE_OFFSET _AC(0xc0000000, UL)
#endif /* CONFIG_64BIT */
#else
#define PAGE_OFFSET ((unsigned long)phys_ram_base)
#endif /* CONFIG_MMU */
#ifndef __ASSEMBLY__
@ -95,14 +96,9 @@ typedef struct page *pgtable_t;
#define MIN_MEMBLOCK_ADDR 0
#endif
#ifdef CONFIG_MMU
#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
#else
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
struct kernel_mapping {
unsigned long page_offset;
unsigned long virt_addr;
unsigned long virt_offset;
uintptr_t phys_addr;
@ -116,6 +112,7 @@ struct kernel_mapping {
uintptr_t xiprom;
uintptr_t xiprom_sz;
#else
unsigned long page_offset;
unsigned long va_kernel_pa_offset;
#endif
};

View File

@ -12,7 +12,11 @@
#include <asm/pgtable-bits.h>
#ifndef CONFIG_MMU
#define KERNEL_LINK_ADDR PAGE_OFFSET
#ifdef CONFIG_RELOCATABLE
#define KERNEL_LINK_ADDR UL(0)
#else
#define KERNEL_LINK_ADDR _AC(CONFIG_PHYS_RAM_BASE, UL)
#endif
#define KERN_VIRT_SIZE (UL(-1))
#else

View File

@ -20,15 +20,13 @@
#include <linux/dma-map-ops.h>
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
#ifdef CONFIG_RELOCATABLE
#include <linux/elf.h>
#endif
#include <linux/kfence.h>
#include <linux/execmem.h>
#include <asm/fixmap.h>
#include <asm/io.h>
#include <asm/kasan.h>
#include <asm/module.h>
#include <asm/numa.h>
#include <asm/pgtable.h>
#include <asm/sections.h>
@ -323,6 +321,44 @@ static void __init setup_bootmem(void)
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
}
#ifdef CONFIG_RELOCATABLE
extern unsigned long __rela_dyn_start, __rela_dyn_end;
static void __init relocate_kernel(void)
{
Elf_Rela *rela = (Elf_Rela *)&__rela_dyn_start;
/*
* This holds the offset between the linked virtual address and the
* relocated virtual address.
*/
uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
/*
* This holds the offset between kernel linked virtual address and
* physical address.
*/
uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
for ( ; rela < (Elf_Rela *)&__rela_dyn_end; rela++) {
Elf_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
Elf_Addr relocated_addr = rela->r_addend;
if (rela->r_info != R_RISCV_RELATIVE)
continue;
/*
* Make sure to not relocate vdso symbols like rt_sigreturn
* which are linked from the address 0 in vmlinux since
* vdso symbol addresses are actually used as an offset from
* mm->context.vdso in VDSO_OFFSET macro.
*/
if (relocated_addr >= KERNEL_LINK_ADDR)
relocated_addr += reloc_offset;
*(Elf_Addr *)addr = relocated_addr;
}
}
#endif /* CONFIG_RELOCATABLE */
#ifdef CONFIG_MMU
struct pt_alloc_ops pt_ops __meminitdata;
@ -823,6 +859,8 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
kernel_map.page_offset = PAGE_OFFSET_L5;
if (satp_mode_cmdline == SATP_MODE_57) {
disable_pgtable_l5();
} else if (satp_mode_cmdline == SATP_MODE_48) {
@ -893,44 +931,6 @@ retry:
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
#ifdef CONFIG_RELOCATABLE
extern unsigned long __rela_dyn_start, __rela_dyn_end;
static void __init relocate_kernel(void)
{
Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
/*
* This holds the offset between the linked virtual address and the
* relocated virtual address.
*/
uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
/*
* This holds the offset between kernel linked virtual address and
* physical address.
*/
uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
Elf64_Addr relocated_addr = rela->r_addend;
if (rela->r_info != R_RISCV_RELATIVE)
continue;
/*
* Make sure to not relocate vdso symbols like rt_sigreturn
* which are linked from the address 0 in vmlinux since
* vdso symbol addresses are actually used as an offset from
* mm->context.vdso in VDSO_OFFSET macro.
*/
if (relocated_addr >= KERNEL_LINK_ADDR)
relocated_addr += reloc_offset;
*(Elf64_Addr *)addr = relocated_addr;
}
}
#endif /* CONFIG_RELOCATABLE */
#ifdef CONFIG_XIP_KERNEL
static void __init create_kernel_page_table(pgd_t *pgdir,
__always_unused bool early)
@ -1108,11 +1108,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.virt_addr = KERNEL_LINK_ADDR + kernel_map.virt_offset;
#ifdef CONFIG_XIP_KERNEL
#ifdef CONFIG_64BIT
kernel_map.page_offset = PAGE_OFFSET_L3;
#else
kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
#endif
kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
kernel_map.xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
@ -1127,7 +1122,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr
+ (uintptr_t)&_sdata - (uintptr_t)&_start;
#else
kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
kernel_map.phys_addr = (uintptr_t)(&_start);
kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
@ -1174,7 +1168,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* makes the kernel cross over a PUD_SIZE boundary, raise a bug
* since a part of the kernel would not get mapped.
*/
BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
if (IS_ENABLED(CONFIG_64BIT))
BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
relocate_kernel();
#endif
@ -1378,6 +1373,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
dtb_early_va = (void *)dtb_pa;
dtb_early_pa = dtb_pa;
#ifdef CONFIG_RELOCATABLE
kernel_map.virt_addr = (uintptr_t)_start;
kernel_map.phys_addr = (uintptr_t)_start;
relocate_kernel();
#endif
}
static inline void setup_vm_final(void)

View File

@ -19,12 +19,8 @@ struct mod_arch_specific
#define Elf_Dyn Elf64_Dyn
#define Elf_Ehdr Elf64_Ehdr
#define Elf_Addr Elf64_Addr
#ifdef CONFIG_MODULES_USE_ELF_REL
#define Elf_Rel Elf64_Rel
#endif
#ifdef CONFIG_MODULES_USE_ELF_RELA
#define Elf_Rela Elf64_Rela
#endif
#define ELF_R_TYPE(X) ELF64_R_TYPE(X)
#define ELF_R_SYM(X) ELF64_R_SYM(X)
@ -36,12 +32,8 @@ struct mod_arch_specific
#define Elf_Dyn Elf32_Dyn
#define Elf_Ehdr Elf32_Ehdr
#define Elf_Addr Elf32_Addr
#ifdef CONFIG_MODULES_USE_ELF_REL
#define Elf_Rel Elf32_Rel
#endif
#ifdef CONFIG_MODULES_USE_ELF_RELA
#define Elf_Rela Elf32_Rela
#endif
#define ELF_R_TYPE(X) ELF32_R_TYPE(X)
#define ELF_R_SYM(X) ELF32_R_SYM(X)
#endif