mirror of
https://github.com/torvalds/linux.git
synced 2025-04-12 16:47:42 +00:00

For different CMAs, concurrent allocation of CMA memory ideally should not require synchronization using locks. Currently, a global cma_mutex lock is employed to synchronize all CMA allocations, which can impact the performance of concurrent allocations across different CMAs. To test the performance impact, follow these steps: 1. Boot the kernel with the command line argument hugetlb_cma=30G to allocate a 30GB CMA area specifically for huge page allocations. (note: on my machine, which has 3 nodes, each node is initialized with 10G of CMA) 2. Use the dd command with parameters if=/dev/zero of=/dev/shm/file bs=1G count=30 to fully utilize the CMA area by writing zeroes to a file in /dev/shm. 3. Open three terminals and execute the following commands simultaneously: (Note: Each of these commands attempts to allocate 10GB [2621440 * 4KB pages] of CMA memory.) On Terminal 1: time echo 2621440 > /sys/kernel/debug/cma/hugetlb1/alloc On Terminal 2: time echo 2621440 > /sys/kernel/debug/cma/hugetlb2/alloc On Terminal 3: time echo 2621440 > /sys/kernel/debug/cma/hugetlb3/alloc We attempt to allocate pages through the CMA debug interface and use the time command to measure the duration of each allocation. Performance comparison: Without this patch With this patch Terminal1 ~7s ~7s Terminal2 ~14s ~8s Terminal3 ~21s ~7s To solve problem above, we could use per-CMA locks to improve concurrent allocation performance. This would allow each CMA to be managed independently, reducing the need for a global lock and thus improving scalability and performance. Link: https://lkml.kernel.org/r/1739152566-744-1-git-send-email-yangge1116@126.com Signed-off-by: Ge Yang <yangge1116@126.com> Reviewed-by: Barry Song <baohua@kernel.org> Acked-by: David Hildenbrand <david@redhat.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Aisheng Dong <aisheng.dong@nxp.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
94 lines
2.6 KiB
C
94 lines
2.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __MM_CMA_H__
|
|
#define __MM_CMA_H__
|
|
|
|
#include <linux/debugfs.h>
|
|
#include <linux/kobject.h>
|
|
|
|
struct cma_kobject {
|
|
struct kobject kobj;
|
|
struct cma *cma;
|
|
};
|
|
|
|
/*
|
|
* Multi-range support. This can be useful if the size of the allocation
|
|
* is not expected to be larger than the alignment (like with hugetlb_cma),
|
|
* and the total amount of memory requested, while smaller than the total
|
|
* amount of memory available, is large enough that it doesn't fit in a
|
|
* single physical memory range because of memory holes.
|
|
*
|
|
* Fields:
|
|
* @base_pfn: physical address of range
|
|
* @early_pfn: first PFN not reserved through cma_reserve_early
|
|
* @count: size of range
|
|
* @bitmap: bitmap of allocated (1 << order_per_bit)-sized chunks.
|
|
*/
|
|
struct cma_memrange {
|
|
unsigned long base_pfn;
|
|
unsigned long early_pfn;
|
|
unsigned long count;
|
|
unsigned long *bitmap;
|
|
#ifdef CONFIG_CMA_DEBUGFS
|
|
struct debugfs_u32_array dfs_bitmap;
|
|
#endif
|
|
};
|
|
#define CMA_MAX_RANGES 8
|
|
|
|
struct cma {
|
|
unsigned long count;
|
|
unsigned long available_count;
|
|
unsigned int order_per_bit; /* Order of pages represented by one bit */
|
|
spinlock_t lock;
|
|
struct mutex alloc_mutex;
|
|
#ifdef CONFIG_CMA_DEBUGFS
|
|
struct hlist_head mem_head;
|
|
spinlock_t mem_head_lock;
|
|
#endif
|
|
char name[CMA_MAX_NAME];
|
|
int nranges;
|
|
struct cma_memrange ranges[CMA_MAX_RANGES];
|
|
#ifdef CONFIG_CMA_SYSFS
|
|
/* the number of CMA page successful allocations */
|
|
atomic64_t nr_pages_succeeded;
|
|
/* the number of CMA page allocation failures */
|
|
atomic64_t nr_pages_failed;
|
|
/* the number of CMA page released */
|
|
atomic64_t nr_pages_released;
|
|
/* kobject requires dynamic object */
|
|
struct cma_kobject *cma_kobj;
|
|
#endif
|
|
unsigned long flags;
|
|
/* NUMA node (NUMA_NO_NODE if unspecified) */
|
|
int nid;
|
|
};
|
|
|
|
enum cma_flags {
|
|
CMA_RESERVE_PAGES_ON_ERROR,
|
|
CMA_ZONES_VALID,
|
|
CMA_ZONES_INVALID,
|
|
CMA_ACTIVATED,
|
|
};
|
|
|
|
extern struct cma cma_areas[MAX_CMA_AREAS];
|
|
extern unsigned int cma_area_count;
|
|
|
|
static inline unsigned long cma_bitmap_maxno(struct cma *cma,
|
|
struct cma_memrange *cmr)
|
|
{
|
|
return cmr->count >> cma->order_per_bit;
|
|
}
|
|
|
|
#ifdef CONFIG_CMA_SYSFS
|
|
void cma_sysfs_account_success_pages(struct cma *cma, unsigned long nr_pages);
|
|
void cma_sysfs_account_fail_pages(struct cma *cma, unsigned long nr_pages);
|
|
void cma_sysfs_account_release_pages(struct cma *cma, unsigned long nr_pages);
|
|
#else
|
|
static inline void cma_sysfs_account_success_pages(struct cma *cma,
|
|
unsigned long nr_pages) {};
|
|
static inline void cma_sysfs_account_fail_pages(struct cma *cma,
|
|
unsigned long nr_pages) {};
|
|
static inline void cma_sysfs_account_release_pages(struct cma *cma,
|
|
unsigned long nr_pages) {};
|
|
#endif
|
|
#endif
|