Merge branch 'strict-mm-typechecks-support' into features

Heiko writes:

"The recent large kernel Rust thread where Linus commented about that
structures may be returned in registers [1] made me again aware that this
is not true for s390 where the ABI defines that structures are returned in
a return value buffer allocated by the caller. This was also mentioned by
Alexander Gordeev a couple of weeks ago.

In theory the -freg-struct-return compiler flag would allow to return small
structures in registers, however that has not been implemented for
s390. Juergen Christ did an experimental gcc implementation which shows the
benefit of such a change (bloat-o-meter):

add/remove: 3/2 grow/shrink: 12/441 up/down: 740/-7182 (-6442)

This result is not very impressive, and doesn't seem to justify a new ABI
for the kernel.

However there is still the existing STRICT_MM_TYPECHECKS which can be used
to change some mm types from structures to simple scalar types. Changing
the mm types results in:

add/remove: 2/8 grow/shrink: 25/116 up/down: 3902/-6204 (-2302)

Which is already a third of the possible savings which would be the result
of the described ABI change.

Therefore add support for a configurable STRICT_MM_TYPECHECKS which allows
to generate better code, but also allows to have type checking for debug
builds."

[1] https://lore.kernel.org/all/CAHk-=wgb1g9VVHRaAnJjrfRFWAOVT2ouNOMqt0js8h3D6zvHDw@mail.gmail.com/

* strict-mm-typechecks-support:
  s390/mm: Add configurable STRICT_MM_TYPECHECKS
  s390/mm: Convert pgste_val() into function
  s390/mm: Convert pgprot_val() into function
  s390/mm: Use pgprot_val() instead of open coding

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
This commit is contained in:
Vasily Gorbik 2025-03-11 15:29:44 +01:00
commit 5983ab1684
7 changed files with 88 additions and 58 deletions

View File

@ -13,6 +13,16 @@ config DEBUG_ENTRY
If unsure, say N.
config STRICT_MM_TYPECHECKS
bool "Strict Memory Management Type Checks"
depends on DEBUG_KERNEL
help
Enable strict type checking for memory management types like pte_t
and pmd_t. This generates slightly worse code and should be used
for debug builds.
If unsure, say N.
config CIO_INJECT
bool "CIO Inject interfaces"
depends on DEBUG_KERNEL && DEBUG_FS

View File

@ -894,6 +894,7 @@ CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
CONFIG_SAMPLE_FTRACE_OPS=m
CONFIG_DEBUG_ENTRY=y
CONFIG_STRICT_MM_TYPECHECKS=y
CONFIG_CIO_INJECT=y
CONFIG_KUNIT=m
CONFIG_KUNIT_DEBUGFS=y

View File

@ -0,0 +1,2 @@
# Help: Enable strict memory management typechecks
CONFIG_STRICT_MM_TYPECHECKS=y

View File

@ -71,9 +71,11 @@ static inline void copy_page(void *to, void *from)
#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
/*
* These are used to make use of C type-checking..
*/
#ifdef CONFIG_STRICT_MM_TYPECHECKS
#define STRICT_MM_TYPECHECKS
#endif
#ifdef STRICT_MM_TYPECHECKS
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct { unsigned long pgste; } pgste_t;
@ -82,43 +84,48 @@ typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long p4d; } p4d_t;
typedef struct { unsigned long pgd; } pgd_t;
#define DEFINE_PGVAL_FUNC(name) \
static __always_inline unsigned long name ## _val(name ## _t name) \
{ \
return name.name; \
}
#else /* STRICT_MM_TYPECHECKS */
typedef unsigned long pgprot_t;
typedef unsigned long pgste_t;
typedef unsigned long pte_t;
typedef unsigned long pmd_t;
typedef unsigned long pud_t;
typedef unsigned long p4d_t;
typedef unsigned long pgd_t;
#define DEFINE_PGVAL_FUNC(name) \
static __always_inline unsigned long name ## _val(name ## _t name) \
{ \
return name; \
}
#endif /* STRICT_MM_TYPECHECKS */
DEFINE_PGVAL_FUNC(pgprot)
DEFINE_PGVAL_FUNC(pgste)
DEFINE_PGVAL_FUNC(pte)
DEFINE_PGVAL_FUNC(pmd)
DEFINE_PGVAL_FUNC(pud)
DEFINE_PGVAL_FUNC(p4d)
DEFINE_PGVAL_FUNC(pgd)
typedef pte_t *pgtable_t;
#define pgprot_val(x) ((x).pgprot)
#define pgste_val(x) ((x).pgste)
static inline unsigned long pte_val(pte_t pte)
{
return pte.pte;
}
static inline unsigned long pmd_val(pmd_t pmd)
{
return pmd.pmd;
}
static inline unsigned long pud_val(pud_t pud)
{
return pud.pud;
}
static inline unsigned long p4d_val(p4d_t p4d)
{
return p4d.p4d;
}
static inline unsigned long pgd_val(pgd_t pgd)
{
return pgd.pgd;
}
#define __pgprot(x) ((pgprot_t) { (x) } )
#define __pgste(x) ((pgste_t) { (x) } )
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pud(x) ((pud_t) { (x) } )
#define __p4d(x) ((p4d_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
static inline void page_set_storage_key(unsigned long addr,
unsigned char skey, int mapped)

View File

@ -593,6 +593,16 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
return 0;
}
static inline pgste_t clear_pgste_bit(pgste_t pgste, unsigned long mask)
{
return __pgste(pgste_val(pgste) & ~mask);
}
static inline pgste_t set_pgste_bit(pgste_t pgste, unsigned long mask)
{
return __pgste(pgste_val(pgste) | mask);
}
static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
{
return __pte(pte_val(pte) & ~pgprot_val(prot));

View File

@ -286,7 +286,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
unsigned long size_pages = PFN_DOWN(size);
int rc;
if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
if (WARN_ON_ONCE(pgprot_val(params->pgprot) != pgprot_val(PAGE_KERNEL)))
return -EINVAL;
VM_BUG_ON(!mhp_range_allowed(start, size, true));

View File

@ -165,10 +165,10 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
/* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
pgste = set_pgste_bit(pgste, bits << 48); /* GR bit & GC bit */
/* Copy page access key and fetch protection bit to pgste */
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
pgste = set_pgste_bit(pgste, (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
#endif
return pgste;
@ -212,7 +212,7 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
}
if (!(pte_val(entry) & _PAGE_PROTECT))
/* This pte allows write access, set user-dirty */
pgste_val(pgste) |= PGSTE_UC_BIT;
pgste = set_pgste_bit(pgste, PGSTE_UC_BIT);
}
#endif
set_pte(ptep, entry);
@ -228,7 +228,7 @@ static inline pgste_t pgste_pte_notify(struct mm_struct *mm,
bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT);
if (bits) {
pgste_val(pgste) ^= bits;
pgste = __pgste(pgste_val(pgste) ^ bits);
ptep_notify(mm, addr, ptep, bits);
}
#endif
@ -601,7 +601,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
/* the mm_has_pgste() check is done in set_pte_at() */
preempt_disable();
pgste = pgste_get_lock(ptep);
pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_ZERO);
pgste_set_key(ptep, pgste, entry, mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
@ -614,7 +614,7 @@ void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
preempt_disable();
pgste = pgste_get_lock(ptep);
pgste_val(pgste) |= PGSTE_IN_BIT;
pgste = set_pgste_bit(pgste, PGSTE_IN_BIT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@ -659,7 +659,7 @@ int ptep_force_prot(struct mm_struct *mm, unsigned long addr,
entry = clear_pte_bit(entry, __pgprot(_PAGE_INVALID));
entry = set_pte_bit(entry, __pgprot(_PAGE_PROTECT));
}
pgste_val(pgste) |= bit;
pgste = set_pgste_bit(pgste, bit);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
return 0;
@ -679,7 +679,7 @@ int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr,
if (!(pte_val(spte) & _PAGE_INVALID) &&
!((pte_val(spte) & _PAGE_PROTECT) &&
!(pte_val(pte) & _PAGE_PROTECT))) {
pgste_val(spgste) |= PGSTE_VSIE_BIT;
spgste = set_pgste_bit(spgste, PGSTE_VSIE_BIT);
tpgste = pgste_get_lock(tptep);
tpte = __pte((pte_val(spte) & PAGE_MASK) |
(pte_val(pte) & _PAGE_PROTECT));
@ -737,7 +737,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
pte_clear(mm, addr, ptep);
}
if (reset)
pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgste = clear_pgste_bit(pgste, _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT);
pgste_set_unlock(ptep, pgste);
preempt_enable();
}
@ -750,8 +750,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/* Clear storage key ACC and F, but set R/C */
preempt_disable();
pgste = pgste_get_lock(ptep);
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT;
pgste = clear_pgste_bit(pgste, PGSTE_ACC_BITS | PGSTE_FP_BIT);
pgste = set_pgste_bit(pgste, PGSTE_GR_BIT | PGSTE_GC_BIT);
ptev = pte_val(*ptep);
if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 0);
@ -772,7 +772,7 @@ bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT;
pgste = clear_pgste_bit(pgste, PGSTE_UC_BIT);
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste);
@ -834,11 +834,11 @@ again:
if (!ptep)
goto again;
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
new = clear_pgste_bit(new, PGSTE_GR_BIT | PGSTE_GC_BIT |
PGSTE_ACC_BITS | PGSTE_FP_BIT);
keyul = (unsigned long) key;
pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
new = set_pgste_bit(new, (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48);
new = set_pgste_bit(new, (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56);
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
unsigned long bits, skey;
@ -849,12 +849,12 @@ again:
/* Set storage key ACC and FP */
page_set_storage_key(paddr, skey, !nq);
/* Merge host changed & referenced into pgste */
pgste_val(new) |= bits << 52;
new = set_pgste_bit(new, bits << 52);
}
/* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
pgste_val(new) |= PGSTE_UC_BIT;
new = set_pgste_bit(new, PGSTE_UC_BIT);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
@ -942,19 +942,19 @@ again:
goto again;
new = old = pgste_get_lock(ptep);
/* Reset guest reference bit only */
pgste_val(new) &= ~PGSTE_GR_BIT;
new = clear_pgste_bit(new, PGSTE_GR_BIT);
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
paddr = pte_val(*ptep) & PAGE_MASK;
cc = page_reset_referenced(paddr);
/* Merge real referenced bit into host-set */
pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
new = set_pgste_bit(new, ((unsigned long)cc << 53) & PGSTE_HR_BIT);
}
/* Reflect guest's logical view, not physical */
cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
/* Changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
pgste_val(new) |= PGSTE_UC_BIT;
new = set_pgste_bit(new, PGSTE_UC_BIT);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
@ -1118,7 +1118,7 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
if (res)
pgstev |= _PGSTE_GPS_ZERO;
pgste_val(pgste) = pgstev;
pgste = __pgste(pgstev);
pgste_set_unlock(ptep, pgste);
pte_unmap_unlock(ptep, ptl);
return res;
@ -1151,8 +1151,8 @@ int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
return -EFAULT;
new = pgste_get_lock(ptep);
pgste_val(new) &= ~bits;
pgste_val(new) |= value & bits;
new = clear_pgste_bit(new, bits);
new = set_pgste_bit(new, value & bits);
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);