mirror of
https://github.com/torvalds/linux.git
synced 2025-04-12 16:47:42 +00:00

For large systems, the overhead of vmemmap pages for hugetlb is substantial. It's about 1.5% of memory, which is about 45G for a 3T system. If you want to configure most of that system for hugetlb (e.g. to use as backing memory for VMs), there is a chance of running out of memory on boot, even though you know that the 45G will become available later. To avoid this scenario, and since it's a waste to first allocate and then free that 45G during boot, do pre-HVO for hugetlb bootmem allocated pages ('gigantic' pages). pre-HVO is done by adding functions that are called from sparse_init_nid_early and sparse_init_nid_late. The first is called before memmap allocation, so it takes care of allocating memmap HVO-style. The second verifies that all bootmem pages look good, specifically it checks that they do not intersect with multiple zones. This can only be done from sparse_init_nid_late path, when zones have been initialized. The hugetlb page size must be aligned to the section size, and aligned to the size of memory described by the number of page structures contained in one PMD (since pre-HVO is not prepared to split PMDs). This should be true for most 'gigantic' pages, it is for 1G pages on x86, where both of these alignment requirements are 128M. This will only have an effect if hugetlb_bootmem_alloc was called early in boot. If not, it won't do anything, and HVO for bootmem hugetlb pages works as before. Link: https://lkml.kernel.org/r/20250228182928.2645936-20-fvdl@google.com Signed-off-by: Frank van der Linden <fvdl@google.com> Cc: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Dan Carpenter <dan.carpenter@linaro.org> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Hildenbrand <david@redhat.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Joao Martins <joao.m.martins@oracle.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Muchun Song <muchun.song@linux.dev> Cc: Oscar Salvador <osalvador@suse.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin (Cruise) <roman.gushchin@linux.dev> Cc: Usama Arif <usamaarif642@gmail.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Yu Zhao <yuzhao@google.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
99 lines
2.8 KiB
C
99 lines
2.8 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* HugeTLB Vmemmap Optimization (HVO)
|
|
*
|
|
* Copyright (c) 2020, ByteDance. All rights reserved.
|
|
*
|
|
* Author: Muchun Song <songmuchun@bytedance.com>
|
|
*/
|
|
#ifndef _LINUX_HUGETLB_VMEMMAP_H
|
|
#define _LINUX_HUGETLB_VMEMMAP_H
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/io.h>
|
|
#include <linux/memblock.h>
|
|
|
|
/*
|
|
* Reserve one vmemmap page, all vmemmap addresses are mapped to it. See
|
|
* Documentation/mm/vmemmap_dedup.rst.
|
|
*/
|
|
#define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE
|
|
#define HUGETLB_VMEMMAP_RESERVE_PAGES (HUGETLB_VMEMMAP_RESERVE_SIZE / sizeof(struct page))
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
|
|
int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio);
|
|
long hugetlb_vmemmap_restore_folios(const struct hstate *h,
|
|
struct list_head *folio_list,
|
|
struct list_head *non_hvo_folios);
|
|
void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
|
|
void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
|
|
void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
|
|
void hugetlb_vmemmap_init_early(int nid);
|
|
void hugetlb_vmemmap_init_late(int nid);
|
|
#endif
|
|
|
|
|
|
static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
|
|
{
|
|
return pages_per_huge_page(h) * sizeof(struct page);
|
|
}
|
|
|
|
/*
|
|
* Return how many vmemmap size associated with a HugeTLB page that can be
|
|
* optimized and can be freed to the buddy allocator.
|
|
*/
|
|
static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
|
|
{
|
|
int size = hugetlb_vmemmap_size(h) - HUGETLB_VMEMMAP_RESERVE_SIZE;
|
|
|
|
if (!is_power_of_2(sizeof(struct page)))
|
|
return 0;
|
|
return size > 0 ? size : 0;
|
|
}
|
|
#else
|
|
static inline int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline long hugetlb_vmemmap_restore_folios(const struct hstate *h,
|
|
struct list_head *folio_list,
|
|
struct list_head *non_hvo_folios)
|
|
{
|
|
list_splice_init(folio_list, non_hvo_folios);
|
|
return 0;
|
|
}
|
|
|
|
static inline void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
|
|
{
|
|
}
|
|
|
|
static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
|
|
{
|
|
}
|
|
|
|
static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h,
|
|
struct list_head *folio_list)
|
|
{
|
|
}
|
|
|
|
static inline void hugetlb_vmemmap_init_early(int nid)
|
|
{
|
|
}
|
|
|
|
static inline void hugetlb_vmemmap_init_late(int nid)
|
|
{
|
|
}
|
|
|
|
static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */
|
|
|
|
static inline bool hugetlb_vmemmap_optimizable(const struct hstate *h)
|
|
{
|
|
return hugetlb_vmemmap_optimizable_size(h) != 0;
|
|
}
|
|
#endif /* _LINUX_HUGETLB_VMEMMAP_H */
|