linux/mm/cma.h
Ge Yang 24ac6fb6e3 mm/cma: using per-CMA locks to improve concurrent allocation performance
For different CMAs, concurrent allocation of CMA memory ideally should not
require synchronization using locks.  Currently, a global cma_mutex lock
is employed to synchronize all CMA allocations, which can impact the
performance of concurrent allocations across different CMAs.

To test the performance impact, follow these steps:
1. Boot the kernel with the command line argument hugetlb_cma=30G to
   allocate a 30GB CMA area specifically for huge page allocations. (note:
   on my machine, which has 3 nodes, each node is initialized with 10G of
   CMA)
2. Use the dd command with parameters if=/dev/zero of=/dev/shm/file bs=1G
   count=30 to fully utilize the CMA area by writing zeroes to a file in
   /dev/shm.
3. Open three terminals and execute the following commands simultaneously:
   (Note: Each of these commands attempts to allocate 10GB [2621440 * 4KB
   pages] of CMA memory.)
   On Terminal 1: time echo 2621440 > /sys/kernel/debug/cma/hugetlb1/alloc
   On Terminal 2: time echo 2621440 > /sys/kernel/debug/cma/hugetlb2/alloc
   On Terminal 3: time echo 2621440 > /sys/kernel/debug/cma/hugetlb3/alloc

We attempt to allocate pages through the CMA debug interface and use the
time command to measure the duration of each allocation.
Performance comparison:
             Without this patch      With this patch
Terminal1        ~7s                     ~7s
Terminal2       ~14s                     ~8s
Terminal3       ~21s                     ~7s

To solve problem above, we could use per-CMA locks to improve concurrent
allocation performance.  This would allow each CMA to be managed
independently, reducing the need for a global lock and thus improving
scalability and performance.

Link: https://lkml.kernel.org/r/1739152566-744-1-git-send-email-yangge1116@126.com
Signed-off-by: Ge Yang <yangge1116@126.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Aisheng Dong <aisheng.dong@nxp.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-03-21 22:03:10 -07:00

94 lines
2.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MM_CMA_H__
#define __MM_CMA_H__
#include <linux/debugfs.h>
#include <linux/kobject.h>
struct cma_kobject {
struct kobject kobj;
struct cma *cma;
};
/*
* Multi-range support. This can be useful if the size of the allocation
* is not expected to be larger than the alignment (like with hugetlb_cma),
* and the total amount of memory requested, while smaller than the total
* amount of memory available, is large enough that it doesn't fit in a
* single physical memory range because of memory holes.
*
* Fields:
* @base_pfn: physical address of range
* @early_pfn: first PFN not reserved through cma_reserve_early
* @count: size of range
* @bitmap: bitmap of allocated (1 << order_per_bit)-sized chunks.
*/
struct cma_memrange {
unsigned long base_pfn;
unsigned long early_pfn;
unsigned long count;
unsigned long *bitmap;
#ifdef CONFIG_CMA_DEBUGFS
struct debugfs_u32_array dfs_bitmap;
#endif
};
#define CMA_MAX_RANGES 8
struct cma {
unsigned long count;
unsigned long available_count;
unsigned int order_per_bit; /* Order of pages represented by one bit */
spinlock_t lock;
struct mutex alloc_mutex;
#ifdef CONFIG_CMA_DEBUGFS
struct hlist_head mem_head;
spinlock_t mem_head_lock;
#endif
char name[CMA_MAX_NAME];
int nranges;
struct cma_memrange ranges[CMA_MAX_RANGES];
#ifdef CONFIG_CMA_SYSFS
/* the number of CMA page successful allocations */
atomic64_t nr_pages_succeeded;
/* the number of CMA page allocation failures */
atomic64_t nr_pages_failed;
/* the number of CMA page released */
atomic64_t nr_pages_released;
/* kobject requires dynamic object */
struct cma_kobject *cma_kobj;
#endif
unsigned long flags;
/* NUMA node (NUMA_NO_NODE if unspecified) */
int nid;
};
enum cma_flags {
CMA_RESERVE_PAGES_ON_ERROR,
CMA_ZONES_VALID,
CMA_ZONES_INVALID,
CMA_ACTIVATED,
};
extern struct cma cma_areas[MAX_CMA_AREAS];
extern unsigned int cma_area_count;
static inline unsigned long cma_bitmap_maxno(struct cma *cma,
struct cma_memrange *cmr)
{
return cmr->count >> cma->order_per_bit;
}
#ifdef CONFIG_CMA_SYSFS
void cma_sysfs_account_success_pages(struct cma *cma, unsigned long nr_pages);
void cma_sysfs_account_fail_pages(struct cma *cma, unsigned long nr_pages);
void cma_sysfs_account_release_pages(struct cma *cma, unsigned long nr_pages);
#else
static inline void cma_sysfs_account_success_pages(struct cma *cma,
unsigned long nr_pages) {};
static inline void cma_sysfs_account_fail_pages(struct cma *cma,
unsigned long nr_pages) {};
static inline void cma_sysfs_account_release_pages(struct cma *cma,
unsigned long nr_pages) {};
#endif
#endif