mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00

Don't force SPTE modifications to be done atomically if the only volatile bit in the SPTE is the Accessed bit. KVM and the primary MMU tolerate stale aging state, and the probability of an Accessed bit A/D assist being clobbered *and* affecting again is likely far lower than the probability of consuming stale information due to not flushing TLBs when aging. Rename spte_has_volatile_bits() to spte_needs_atomic_update() to better capture the nature of the helper. Opportunstically do s/write/update on the TDP MMU wrapper, as it's not simply the "write" that needs to be done atomically, it's the entire update, i.e. the entire read-modify-write operation needs to be done atomically so that KVM has an accurate view of the old SPTE. Leave kvm_tdp_mmu_write_spte_atomic() as is. While the name is imperfect, it pairs with kvm_tdp_mmu_write_spte(), which in turn pairs with kvm_tdp_mmu_read_spte(). And renaming all of those isn't obviously a net positive, and would require significant churn. Signed-off-by: James Houghton <jthoughton@google.com> Link: https://lore.kernel.org/r/20250204004038.1680123-6-jthoughton@google.com Co-developed-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Sean Christopherson <seanjc@google.com>
144 lines
4.5 KiB
C
144 lines
4.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#ifndef __KVM_X86_MMU_TDP_ITER_H
|
|
#define __KVM_X86_MMU_TDP_ITER_H
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include "mmu.h"
|
|
#include "spte.h"
|
|
|
|
/*
|
|
* TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
|
|
* to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
|
|
* batched without having to collect the list of zapped SPs. Flows that can
|
|
* remove SPs must service pending TLB flushes prior to dropping RCU protection.
|
|
*/
|
|
static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
|
|
{
|
|
return READ_ONCE(*rcu_dereference(sptep));
|
|
}
|
|
|
|
static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
|
|
{
|
|
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
|
|
return xchg(rcu_dereference(sptep), new_spte);
|
|
}
|
|
|
|
static inline u64 tdp_mmu_clear_spte_bits_atomic(tdp_ptep_t sptep, u64 mask)
|
|
{
|
|
atomic64_t *sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
|
|
|
|
return (u64)atomic64_fetch_and(~mask, sptep_atomic);
|
|
}
|
|
|
|
static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
|
|
{
|
|
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
|
|
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
|
|
}
|
|
|
|
/*
|
|
* SPTEs must be modified atomically if they are shadow-present, leaf SPTEs,
|
|
* and have volatile bits (bits that can be set outside of mmu_lock) that
|
|
* must not be clobbered.
|
|
*/
|
|
static inline bool kvm_tdp_mmu_spte_need_atomic_update(u64 old_spte, int level)
|
|
{
|
|
return is_shadow_present_pte(old_spte) &&
|
|
is_last_spte(old_spte, level) &&
|
|
spte_needs_atomic_update(old_spte);
|
|
}
|
|
|
|
static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
|
|
u64 new_spte, int level)
|
|
{
|
|
if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level))
|
|
return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
|
|
|
|
__kvm_tdp_mmu_write_spte(sptep, new_spte);
|
|
return old_spte;
|
|
}
|
|
|
|
static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
|
|
u64 mask, int level)
|
|
{
|
|
if (kvm_tdp_mmu_spte_need_atomic_update(old_spte, level))
|
|
return tdp_mmu_clear_spte_bits_atomic(sptep, mask);
|
|
|
|
__kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
|
|
return old_spte;
|
|
}
|
|
|
|
/*
|
|
* A TDP iterator performs a pre-order walk over a TDP paging structure.
|
|
*/
|
|
struct tdp_iter {
|
|
/*
|
|
* The iterator will traverse the paging structure towards the mapping
|
|
* for this GFN.
|
|
*/
|
|
gfn_t next_last_level_gfn;
|
|
/*
|
|
* The next_last_level_gfn at the time when the thread last
|
|
* yielded. Only yielding when the next_last_level_gfn !=
|
|
* yielded_gfn helps ensure forward progress.
|
|
*/
|
|
gfn_t yielded_gfn;
|
|
/* Pointers to the page tables traversed to reach the current SPTE */
|
|
tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
|
|
/* A pointer to the current SPTE */
|
|
tdp_ptep_t sptep;
|
|
/* The lowest GFN (mask bits excluded) mapped by the current SPTE */
|
|
gfn_t gfn;
|
|
/* Mask applied to convert the GFN to the mapping GPA */
|
|
gfn_t gfn_bits;
|
|
/* The level of the root page given to the iterator */
|
|
int root_level;
|
|
/* The lowest level the iterator should traverse to */
|
|
int min_level;
|
|
/* The iterator's current level within the paging structure */
|
|
int level;
|
|
/* The address space ID, i.e. SMM vs. regular. */
|
|
int as_id;
|
|
/* A snapshot of the value at sptep */
|
|
u64 old_spte;
|
|
/*
|
|
* Whether the iterator has a valid state. This will be false if the
|
|
* iterator walks off the end of the paging structure.
|
|
*/
|
|
bool valid;
|
|
/*
|
|
* True if KVM dropped mmu_lock and yielded in the middle of a walk, in
|
|
* which case tdp_iter_next() needs to restart the walk at the root
|
|
* level instead of advancing to the next entry.
|
|
*/
|
|
bool yielded;
|
|
};
|
|
|
|
/*
|
|
* Iterates over every SPTE mapping the GFN range [start, end) in a
|
|
* preorder traversal.
|
|
*/
|
|
#define for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end) \
|
|
for (tdp_iter_start(&iter, root, min_level, start, kvm_gfn_root_bits(kvm, root)); \
|
|
iter.valid && iter.gfn < end; \
|
|
tdp_iter_next(&iter))
|
|
|
|
#define for_each_tdp_pte_min_level_all(iter, root, min_level) \
|
|
for (tdp_iter_start(&iter, root, min_level, 0, 0); \
|
|
iter.valid && iter.gfn < tdp_mmu_max_gfn_exclusive(); \
|
|
tdp_iter_next(&iter))
|
|
|
|
#define for_each_tdp_pte(iter, kvm, root, start, end) \
|
|
for_each_tdp_pte_min_level(iter, kvm, root, PG_LEVEL_4K, start, end)
|
|
|
|
tdp_ptep_t spte_to_child_pt(u64 pte, int level);
|
|
|
|
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
|
|
int min_level, gfn_t next_last_level_gfn, gfn_t gfn_bits);
|
|
void tdp_iter_next(struct tdp_iter *iter);
|
|
void tdp_iter_restart(struct tdp_iter *iter);
|
|
|
|
#endif /* __KVM_X86_MMU_TDP_ITER_H */
|