mirror of
https://github.com/torvalds/linux.git
synced 2025-04-05 23:14:13 +00:00
Miscellaneous x86 fixes and updates:
- Fix a large number of x86 Kconfig dependency and help text accuracy bugs/problems, by Mateusz Jończyk and David Heideberg. - Fix a VM_PAT interaction with fork() crash. This also touches core kernel code. - Fix an ORC unwinder bug for interrupt entries - Fixes and cleanups. - Fix an AMD microcode loader bug that can promote verification failures into success. - Add early-printk support for MMIO based UARTs on an x86 board that had no other serial debugging facility and also experienced early boot crashes. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmfnFBERHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iVDxAAmiB4soT3/WbaWJJdeVyxEL7sOmUNOm04 5kAVHJVK8QGdje0eWa6h7xmuQD3UOxafE2coCrOxHhZi2qpAAY6CPIIy6oIBRwZK gLgT5xn1CHojfm4UFC3YUOyecBRPUF2C5jfkajWdZHumyPP/sOObqvGanpQRAYd5 bfPHEvrBpeEeS7WkATCdyF2j+I5xYflD4g/MDAsMmqasQHOnjBuFX5VBeVxxkysC dMsFkFpxqcA95MnnyOnxXzgOtRTY0UystX07D3Bk1pqhG9zor+mp8OynsTRCU87T ZPPbUr2qACNmCqEEXl+F1mAkgj5H66xE2gaJdYx0/jBAIbX8Nwih7mMxhJShVU07 Lhc0tukmVrDoDaVIr2HsxqI8iokuYLszUjDAqEQmQDrgelL6usPYghN1b2bDSJ9r 0hCO/s79024H/U9oMrC+CF52D5UH/fE98ipigrbKRIO/hOsoxiiniF3DG2NVWZM2 n5nPnOdbperqjCEteN1nxQfr7XZkvP95Bwmuqqc90XH+tzKJdHruUkbm4ua7NEEz WKgsUIYFjeN5ZrHbJaNtHlQueTyvsyGmL1nlaLi/MaJbSXPsM/WfwvHsaKTh3NrE BFwEAhMZVLDHEfnFT0Ev7Mm1MGpW8MbHoRBR1+E5FWWNS4X0yGLKXWRp8diw25Tm W3ZVsn65E6U= =/qKX -----END PGP SIGNATURE----- Merge tag 'x86-urgent-2025-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull misc x86 fixes and updates from Ingo Molnar: - Fix a large number of x86 Kconfig dependency and help text accuracy bugs/problems, by Mateusz Jończyk and David Heideberg - Fix a VM_PAT interaction with fork() crash. This also touches core kernel code - Fix an ORC unwinder bug for interrupt entries - Fixes and cleanups - Fix an AMD microcode loader bug that can promote verification failures into success - Add early-printk support for MMIO based UARTs on an x86 board that had no other serial debugging facility and also experienced early boot crashes * tag 'x86-urgent-2025-03-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/microcode/AMD: Fix __apply_microcode_amd()'s return value x86/mm/pat: Fix VM_PAT handling when fork() fails in copy_page_range() x86/fpu: Update the outdated comment above fpstate_init_user() x86/early_printk: Add support for MMIO-based UARTs x86/dumpstack: Fix inaccurate unwinding from exception stacks due to misplaced assignment x86/entry: Fix ORC unwinder for PUSH_REGS with save_ret=1 x86/Kconfig: Fix lists in X86_EXTENDED_PLATFORM help text x86/Kconfig: Correct X86_X2APIC help text x86/speculation: Remove the extra #ifdef around CALL_NOSPEC x86/Kconfig: Document release year of glibc 2.3.3 x86/Kconfig: Make CONFIG_PCI_CNB20LE_QUIRK depend on X86_32 x86/Kconfig: Document CONFIG_PCI_MMCONFIG x86/Kconfig: Update lists in X86_EXTENDED_PLATFORM x86/Kconfig: Move all X86_EXTENDED_PLATFORM options together x86/Kconfig: Always enable ARCH_SPARSEMEM_ENABLE x86/Kconfig: Enable X86_X2APIC by default and improve help text
This commit is contained in:
commit
7405c0f01a
@ -1407,14 +1407,21 @@
|
||||
earlyprintk=serial[,0x...[,baudrate]]
|
||||
earlyprintk=ttySn[,baudrate]
|
||||
earlyprintk=dbgp[debugController#]
|
||||
earlyprintk=pciserial[,force],bus:device.function[,baudrate]
|
||||
earlyprintk=pciserial[,force],bus:device.function[,{nocfg|baudrate}]
|
||||
earlyprintk=xdbc[xhciController#]
|
||||
earlyprintk=bios
|
||||
earlyprintk=mmio,membase[,{nocfg|baudrate}]
|
||||
|
||||
earlyprintk is useful when the kernel crashes before
|
||||
the normal console is initialized. It is not enabled by
|
||||
default because it has some cosmetic problems.
|
||||
|
||||
Only 32-bit memory addresses are supported for "mmio"
|
||||
and "pciserial" devices.
|
||||
|
||||
Use "nocfg" to skip UART configuration, assume
|
||||
BIOS/firmware has configured UART correctly.
|
||||
|
||||
Append ",keep" to not disable it when the real console
|
||||
takes over.
|
||||
|
||||
|
@ -460,20 +460,28 @@ config SMP
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
config X86_X2APIC
|
||||
bool "Support x2apic"
|
||||
bool "x2APIC interrupt controller architecture support"
|
||||
depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
|
||||
default y
|
||||
help
|
||||
This enables x2apic support on CPUs that have this feature.
|
||||
x2APIC is an interrupt controller architecture, a component of which
|
||||
(the local APIC) is present in the CPU. It allows faster access to
|
||||
the local APIC and supports a larger number of CPUs in the system
|
||||
than the predecessors.
|
||||
|
||||
This allows 32-bit apic IDs (so it can support very large systems),
|
||||
and accesses the local apic via MSRs not via mmio.
|
||||
x2APIC was introduced in Intel CPUs around 2008 and in AMD EPYC CPUs
|
||||
in 2019, but it can be disabled by the BIOS. It is also frequently
|
||||
emulated in virtual machines, even when the host CPU does not support
|
||||
it. Support in the CPU can be checked by executing
|
||||
grep x2apic /proc/cpuinfo
|
||||
|
||||
Some Intel systems circa 2022 and later are locked into x2APIC mode
|
||||
and can not fall back to the legacy APIC modes if SGX or TDX are
|
||||
enabled in the BIOS. They will boot with very reduced functionality
|
||||
without enabling this option.
|
||||
If this configuration option is disabled, the kernel will boot with
|
||||
very reduced functionality and performance on some platforms that
|
||||
have x2APIC enabled. On the other hand, on hardware that does not
|
||||
support x2APIC, a kernel with this option enabled will just fallback
|
||||
to older APIC implementations.
|
||||
|
||||
If you don't know what to do here, say N.
|
||||
If in doubt, say Y.
|
||||
|
||||
config X86_POSTED_MSI
|
||||
bool "Enable MSI and MSI-x delivery by posted interrupts"
|
||||
@ -544,16 +552,17 @@ config X86_EXTENDED_PLATFORM
|
||||
CONFIG_64BIT.
|
||||
|
||||
32-bit platforms (CONFIG_64BIT=n):
|
||||
Goldfish (Android emulator)
|
||||
AMD Elan
|
||||
Goldfish (mostly Android emulator)
|
||||
Intel CE media processor (CE4100) SoC
|
||||
Intel Quark
|
||||
RDC R-321x SoC
|
||||
SGI 320/540 (Visual Workstation)
|
||||
|
||||
64-bit platforms (CONFIG_64BIT=y):
|
||||
Numascale NumaChip
|
||||
ScaleMP vSMP
|
||||
SGI Ultraviolet
|
||||
Merrifield/Moorefield MID devices
|
||||
Goldfish (mostly Android emulator)
|
||||
|
||||
If you have one of these systems, or if you want to build a
|
||||
generic distribution kernel, say Y here - otherwise say N.
|
||||
@ -667,6 +676,17 @@ config X86_INTEL_QUARK
|
||||
Say Y here if you have a Quark based system such as the Arduino
|
||||
compatible Intel Galileo.
|
||||
|
||||
config X86_RDC321X
|
||||
bool "RDC R-321x SoC"
|
||||
depends on X86_32
|
||||
depends on X86_EXTENDED_PLATFORM
|
||||
select M486
|
||||
select X86_REBOOTFIXUPS
|
||||
help
|
||||
This option is needed for RDC R-321x system-on-chip, also known
|
||||
as R-8610-(G).
|
||||
If you don't have one of these chips, you should say N here.
|
||||
|
||||
config X86_INTEL_LPSS
|
||||
bool "Intel Low Power Subsystem Support"
|
||||
depends on X86 && ACPI && PCI
|
||||
@ -720,17 +740,6 @@ config IOSF_MBI_DEBUG
|
||||
|
||||
If you don't require the option or are in doubt, say N.
|
||||
|
||||
config X86_RDC321X
|
||||
bool "RDC R-321x SoC"
|
||||
depends on X86_32
|
||||
depends on X86_EXTENDED_PLATFORM
|
||||
select M486
|
||||
select X86_REBOOTFIXUPS
|
||||
help
|
||||
This option is needed for RDC R-321x system-on-chip, also known
|
||||
as R-8610-(G).
|
||||
If you don't have one of these chips, you should say N here.
|
||||
|
||||
config X86_SUPPORTS_MEMORY_FAILURE
|
||||
def_bool y
|
||||
# MCE code calls memory_failure():
|
||||
@ -1565,7 +1574,6 @@ config ARCH_FLATMEM_ENABLE
|
||||
|
||||
config ARCH_SPARSEMEM_ENABLE
|
||||
def_bool y
|
||||
depends on X86_64 || NUMA || X86_32
|
||||
select SPARSEMEM_STATIC if X86_32
|
||||
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
|
||||
|
||||
@ -2212,7 +2220,7 @@ config HOTPLUG_CPU
|
||||
|
||||
config COMPAT_VDSO
|
||||
def_bool n
|
||||
prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
|
||||
prompt "Workaround for glibc 2.3.2 / 2.3.3 (released in year 2003/2004)"
|
||||
depends on COMPAT_32
|
||||
help
|
||||
Certain buggy versions of glibc will crash if they are
|
||||
@ -2901,6 +2909,19 @@ config PCI_MMCONFIG
|
||||
default y
|
||||
depends on PCI && (ACPI || JAILHOUSE_GUEST)
|
||||
depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG)
|
||||
help
|
||||
Add support for accessing the PCI configuration space as a memory
|
||||
mapped area. It is the recommended method if the system supports
|
||||
this (it must have PCI Express and ACPI for it to be available).
|
||||
|
||||
In the unlikely case that enabling this configuration option causes
|
||||
problems, the mechanism can be switched off with the 'pci=nommconf'
|
||||
command line parameter.
|
||||
|
||||
Say N only if you are sure that your platform does not support this
|
||||
access method or you have problems caused by it.
|
||||
|
||||
Say Y otherwise.
|
||||
|
||||
config PCI_OLPC
|
||||
def_bool y
|
||||
@ -2915,13 +2936,21 @@ config MMCONF_FAM10H
|
||||
depends on X86_64 && PCI_MMCONFIG && ACPI
|
||||
|
||||
config PCI_CNB20LE_QUIRK
|
||||
bool "Read CNB20LE Host Bridge Windows" if EXPERT
|
||||
depends on PCI
|
||||
bool "Read PCI host bridge windows from the CNB20LE chipset" if EXPERT
|
||||
depends on X86_32 && PCI
|
||||
help
|
||||
Read the PCI windows out of the CNB20LE host bridge. This allows
|
||||
PCI hotplug to work on systems with the CNB20LE chipset which do
|
||||
not have ACPI.
|
||||
|
||||
The ServerWorks (later Broadcom) CNB20LE was a chipset designed
|
||||
most probably only for Pentium III.
|
||||
|
||||
To find out if you have such a chipset, search for a PCI device with
|
||||
1166:0009 PCI IDs, for example by executing
|
||||
lspci -nn | grep '1166:0009'
|
||||
The code is inactive if there is none.
|
||||
|
||||
There's no public spec for this chipset, and this functionality
|
||||
is known to be incomplete.
|
||||
|
||||
|
@ -70,6 +70,8 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
pushq %rsi /* pt_regs->si */
|
||||
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
|
||||
movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
|
||||
/* We just clobbered the return address - use the IRET frame for unwinding: */
|
||||
UNWIND_HINT_IRET_REGS offset=3*8
|
||||
.else
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
|
@ -435,12 +435,8 @@ static inline void call_depth_return_thunk(void) {}
|
||||
* Inline asm uses the %V modifier which is only in newer GCC
|
||||
* which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
|
||||
*/
|
||||
#ifdef CONFIG_MITIGATION_RETPOLINE
|
||||
#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
|
||||
"call __x86_indirect_thunk_%V[thunk_target]\n"
|
||||
#else
|
||||
#define CALL_NOSPEC "call *%[thunk_target]\n"
|
||||
#endif
|
||||
|
||||
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
|
||||
|
||||
|
@ -600,7 +600,7 @@ static bool __apply_microcode_amd(struct microcode_amd *mc, u32 *cur_rev,
|
||||
unsigned long p_addr = (unsigned long)&mc->hdr.data_code;
|
||||
|
||||
if (!verify_sha256_digest(mc->hdr.patch_id, *cur_rev, (const u8 *)p_addr, psize))
|
||||
return -1;
|
||||
return false;
|
||||
|
||||
native_wrmsrl(MSR_AMD64_PATCH_LOADER, p_addr);
|
||||
|
||||
|
@ -195,6 +195,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
printk("%sCall Trace:\n", log_lvl);
|
||||
|
||||
unwind_start(&state, task, regs, stack);
|
||||
stack = stack ?: get_stack_pointer(task, regs);
|
||||
regs = unwind_get_entry_regs(&state, &partial);
|
||||
|
||||
/*
|
||||
@ -213,9 +214,7 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
* - hardirq stack
|
||||
* - entry stack
|
||||
*/
|
||||
for (stack = stack ?: get_stack_pointer(task, regs);
|
||||
stack;
|
||||
stack = stack_info.next_sp) {
|
||||
for (; stack; stack = stack_info.next_sp) {
|
||||
const char *stack_name;
|
||||
|
||||
stack = PTR_ALIGN(stack, sizeof(long));
|
||||
|
@ -190,7 +190,6 @@ static __init void early_serial_init(char *s)
|
||||
early_serial_hw_init(divisor);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
static __noendbr void mem32_serial_out(unsigned long addr, int offset, int value)
|
||||
{
|
||||
u32 __iomem *vaddr = (u32 __iomem *)addr;
|
||||
@ -207,6 +206,45 @@ static __noendbr unsigned int mem32_serial_in(unsigned long addr, int offset)
|
||||
}
|
||||
ANNOTATE_NOENDBR_SYM(mem32_serial_in);
|
||||
|
||||
/*
|
||||
* early_mmio_serial_init() - Initialize MMIO-based early serial console.
|
||||
* @s: MMIO-based serial specification.
|
||||
*/
|
||||
static __init void early_mmio_serial_init(char *s)
|
||||
{
|
||||
unsigned long baudrate;
|
||||
unsigned long membase;
|
||||
char *e;
|
||||
|
||||
if (*s == ',')
|
||||
s++;
|
||||
|
||||
if (!strncmp(s, "0x", 2)) {
|
||||
/* NB: only 32-bit addresses are supported. */
|
||||
membase = simple_strtoul(s, &e, 16);
|
||||
early_serial_base = (unsigned long)early_ioremap(membase, PAGE_SIZE);
|
||||
|
||||
static_call_update(serial_in, mem32_serial_in);
|
||||
static_call_update(serial_out, mem32_serial_out);
|
||||
|
||||
s += strcspn(s, ",");
|
||||
if (*s == ',')
|
||||
s++;
|
||||
}
|
||||
|
||||
if (!strncmp(s, "nocfg", 5)) {
|
||||
baudrate = 0;
|
||||
} else {
|
||||
baudrate = simple_strtoul(s, &e, 0);
|
||||
if (baudrate == 0 || s == e)
|
||||
baudrate = DEFAULT_BAUD;
|
||||
}
|
||||
|
||||
if (baudrate)
|
||||
early_serial_hw_init(115200 / baudrate);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PCI
|
||||
/*
|
||||
* early_pci_serial_init()
|
||||
*
|
||||
@ -351,6 +389,11 @@ static int __init setup_early_printk(char *buf)
|
||||
keep = (strstr(buf, "keep") != NULL);
|
||||
|
||||
while (*buf != '\0') {
|
||||
if (!strncmp(buf, "mmio", 4)) {
|
||||
early_mmio_serial_init(buf + 4);
|
||||
early_console_register(&early_serial_console, keep);
|
||||
buf += 4;
|
||||
}
|
||||
if (!strncmp(buf, "serial", 6)) {
|
||||
buf += 6;
|
||||
early_serial_init(buf);
|
||||
|
@ -508,7 +508,7 @@ static inline void fpstate_init_fstate(struct fpstate *fpstate)
|
||||
/*
|
||||
* Used in two places:
|
||||
* 1) Early boot to setup init_fpstate for non XSAVE systems
|
||||
* 2) fpu_init_fpstate_user() which is invoked from KVM
|
||||
* 2) fpu_alloc_guest_fpstate() which is invoked from KVM
|
||||
*/
|
||||
void fpstate_init_user(struct fpstate *fpstate)
|
||||
{
|
||||
|
@ -984,29 +984,42 @@ static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* track_pfn_copy is called when vma that is covering the pfnmap gets
|
||||
* copied through copy_page_range().
|
||||
*
|
||||
* If the vma has a linear pfn mapping for the entire range, we get the prot
|
||||
* from pte and reserve the entire vma range with single reserve_pfn_range call.
|
||||
*/
|
||||
int track_pfn_copy(struct vm_area_struct *vma)
|
||||
int track_pfn_copy(struct vm_area_struct *dst_vma,
|
||||
struct vm_area_struct *src_vma, unsigned long *pfn)
|
||||
{
|
||||
const unsigned long vma_size = src_vma->vm_end - src_vma->vm_start;
|
||||
resource_size_t paddr;
|
||||
unsigned long vma_size = vma->vm_end - vma->vm_start;
|
||||
pgprot_t pgprot;
|
||||
int rc;
|
||||
|
||||
if (vma->vm_flags & VM_PAT) {
|
||||
if (get_pat_info(vma, &paddr, &pgprot))
|
||||
return -EINVAL;
|
||||
/* reserve the whole chunk covered by vma. */
|
||||
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
|
||||
}
|
||||
if (!(src_vma->vm_flags & VM_PAT))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Duplicate the PAT information for the dst VMA based on the src
|
||||
* VMA.
|
||||
*/
|
||||
if (get_pat_info(src_vma, &paddr, &pgprot))
|
||||
return -EINVAL;
|
||||
rc = reserve_pfn_range(paddr, vma_size, &pgprot, 1);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/* Reservation for the destination VMA succeeded. */
|
||||
vm_flags_set(dst_vma, VM_PAT);
|
||||
*pfn = PHYS_PFN(paddr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn)
|
||||
{
|
||||
untrack_pfn(dst_vma, pfn, dst_vma->vm_end - dst_vma->vm_start, true);
|
||||
/*
|
||||
* Reservation was freed, any copied page tables will get cleaned
|
||||
* up later, but without getting PAT involved again.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* prot is passed in as a parameter for the new mapping. If the vma has
|
||||
* a linear pfn mapping for the entire range, or no vma is provided,
|
||||
@ -1095,15 +1108,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn_clear is called if the following situation fits:
|
||||
*
|
||||
* 1) while mremapping a pfnmap for a new region, with the old vma after
|
||||
* its pfnmap page table has been removed. The new vma has a new pfnmap
|
||||
* to the same pfn & cache type with VM_PAT set.
|
||||
* 2) while duplicating vm area, the new vma fails to copy the pgtable from
|
||||
* old vma.
|
||||
*/
|
||||
void untrack_pfn_clear(struct vm_area_struct *vma)
|
||||
{
|
||||
vm_flags_clear(vma, VM_PAT);
|
||||
|
@ -1508,14 +1508,25 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
|
||||
}
|
||||
|
||||
/*
|
||||
* track_pfn_copy is called when vma that is covering the pfnmap gets
|
||||
* copied through copy_page_range().
|
||||
* track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page
|
||||
* tables copied during copy_page_range(). On success, stores the pfn to be
|
||||
* passed to untrack_pfn_copy().
|
||||
*/
|
||||
static inline int track_pfn_copy(struct vm_area_struct *vma)
|
||||
static inline int track_pfn_copy(struct vm_area_struct *dst_vma,
|
||||
struct vm_area_struct *src_vma, unsigned long *pfn)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during
|
||||
* copy_page_range(), but after track_pfn_copy() was already called.
|
||||
*/
|
||||
static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma,
|
||||
unsigned long pfn)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn is called while unmapping a pfnmap for a region.
|
||||
* untrack can be called for a specific region indicated by pfn and size or
|
||||
@ -1528,8 +1539,10 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
|
||||
}
|
||||
|
||||
/*
|
||||
* untrack_pfn_clear is called while mremapping a pfnmap for a new region
|
||||
* or fails to copy pgtable during duplicate vm area.
|
||||
* untrack_pfn_clear is called in the following cases on a VM_PFNMAP VMA:
|
||||
*
|
||||
* 1) During mremap() on the src VMA after the page tables were moved.
|
||||
* 2) During fork() on the dst VMA, immediately after duplicating the src VMA.
|
||||
*/
|
||||
static inline void untrack_pfn_clear(struct vm_area_struct *vma)
|
||||
{
|
||||
@ -1540,7 +1553,10 @@ extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
|
||||
unsigned long size);
|
||||
extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
|
||||
pfn_t pfn);
|
||||
extern int track_pfn_copy(struct vm_area_struct *vma);
|
||||
extern int track_pfn_copy(struct vm_area_struct *dst_vma,
|
||||
struct vm_area_struct *src_vma, unsigned long *pfn);
|
||||
extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
|
||||
unsigned long pfn);
|
||||
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
|
||||
unsigned long size, bool mm_wr_locked);
|
||||
extern void untrack_pfn_clear(struct vm_area_struct *vma);
|
||||
|
@ -504,6 +504,10 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
|
||||
vma_numab_state_init(new);
|
||||
dup_anon_vma_name(orig, new);
|
||||
|
||||
/* track_pfn_copy() will later take care of copying internal state. */
|
||||
if (unlikely(new->vm_flags & VM_PFNMAP))
|
||||
untrack_pfn_clear(new);
|
||||
|
||||
return new;
|
||||
}
|
||||
|
||||
|
11
mm/memory.c
11
mm/memory.c
@ -1362,12 +1362,12 @@ int
|
||||
copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
|
||||
{
|
||||
pgd_t *src_pgd, *dst_pgd;
|
||||
unsigned long next;
|
||||
unsigned long addr = src_vma->vm_start;
|
||||
unsigned long end = src_vma->vm_end;
|
||||
struct mm_struct *dst_mm = dst_vma->vm_mm;
|
||||
struct mm_struct *src_mm = src_vma->vm_mm;
|
||||
struct mmu_notifier_range range;
|
||||
unsigned long next, pfn;
|
||||
bool is_cow;
|
||||
int ret;
|
||||
|
||||
@ -1378,11 +1378,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
|
||||
return copy_hugetlb_page_range(dst_mm, src_mm, dst_vma, src_vma);
|
||||
|
||||
if (unlikely(src_vma->vm_flags & VM_PFNMAP)) {
|
||||
/*
|
||||
* We do not free on error cases below as remove_vma
|
||||
* gets called on error from higher level routine
|
||||
*/
|
||||
ret = track_pfn_copy(src_vma);
|
||||
ret = track_pfn_copy(dst_vma, src_vma, &pfn);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -1419,7 +1415,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
|
||||
continue;
|
||||
if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd,
|
||||
addr, next))) {
|
||||
untrack_pfn_clear(dst_vma);
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
@ -1429,6 +1424,8 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
|
||||
raw_write_seqcount_end(&src_mm->write_protect_seq);
|
||||
mmu_notifier_invalidate_range_end(&range);
|
||||
}
|
||||
if (ret && unlikely(src_vma->vm_flags & VM_PFNMAP))
|
||||
untrack_pfn_copy(dst_vma, pfn);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user