mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00

commit 37b2a6510a48("KVM: use __vcalloc for very large allocations") replaced kvzalloc()/kvcalloc() with vcalloc(), but didn't replace kvfree() with vfree(). Signed-off-by: Li RongQing <lirongqing@baidu.com> Link: https://lore.kernel.org/r/20240131012357.53563-1-lirongqing@baidu.com Signed-off-by: Sean Christopherson <seanjc@google.com>
372 lines
9.2 KiB
C
372 lines
9.2 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Support KVM gust page tracking
|
|
*
|
|
* This feature allows us to track page access in guest. Currently, only
|
|
* write access is tracked.
|
|
*
|
|
* Copyright(C) 2015 Intel Corporation.
|
|
*
|
|
* Author:
|
|
* Xiao Guangrong <guangrong.xiao@linux.intel.com>
|
|
*/
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/lockdep.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/rculist.h>
|
|
|
|
#include "mmu.h"
|
|
#include "mmu_internal.h"
|
|
#include "page_track.h"
|
|
|
|
static bool kvm_external_write_tracking_enabled(struct kvm *kvm)
|
|
{
|
|
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
|
/*
|
|
* Read external_write_tracking_enabled before related pointers. Pairs
|
|
* with the smp_store_release in kvm_page_track_write_tracking_enable().
|
|
*/
|
|
return smp_load_acquire(&kvm->arch.external_write_tracking_enabled);
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
|
|
{
|
|
return kvm_external_write_tracking_enabled(kvm) ||
|
|
kvm_shadow_root_allocated(kvm) || !tdp_enabled;
|
|
}
|
|
|
|
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
|
|
{
|
|
vfree(slot->arch.gfn_write_track);
|
|
slot->arch.gfn_write_track = NULL;
|
|
}
|
|
|
|
static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
|
|
unsigned long npages)
|
|
{
|
|
const size_t size = sizeof(*slot->arch.gfn_write_track);
|
|
|
|
if (!slot->arch.gfn_write_track)
|
|
slot->arch.gfn_write_track = __vcalloc(npages, size,
|
|
GFP_KERNEL_ACCOUNT);
|
|
|
|
return slot->arch.gfn_write_track ? 0 : -ENOMEM;
|
|
}
|
|
|
|
int kvm_page_track_create_memslot(struct kvm *kvm,
|
|
struct kvm_memory_slot *slot,
|
|
unsigned long npages)
|
|
{
|
|
if (!kvm_page_track_write_tracking_enabled(kvm))
|
|
return 0;
|
|
|
|
return __kvm_page_track_write_tracking_alloc(slot, npages);
|
|
}
|
|
|
|
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
|
|
{
|
|
return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
|
|
}
|
|
|
|
static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
|
|
short count)
|
|
{
|
|
int index, val;
|
|
|
|
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
|
|
|
val = slot->arch.gfn_write_track[index];
|
|
|
|
if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
|
|
return;
|
|
|
|
slot->arch.gfn_write_track[index] += count;
|
|
}
|
|
|
|
void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
|
gfn_t gfn)
|
|
{
|
|
lockdep_assert_held_write(&kvm->mmu_lock);
|
|
|
|
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
|
|
srcu_read_lock_held(&kvm->srcu));
|
|
|
|
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
|
|
return;
|
|
|
|
update_gfn_write_track(slot, gfn, 1);
|
|
|
|
/*
|
|
* new track stops large page mapping for the
|
|
* tracked page.
|
|
*/
|
|
kvm_mmu_gfn_disallow_lpage(slot, gfn);
|
|
|
|
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
|
|
kvm_flush_remote_tlbs(kvm);
|
|
}
|
|
|
|
void __kvm_write_track_remove_gfn(struct kvm *kvm,
|
|
struct kvm_memory_slot *slot, gfn_t gfn)
|
|
{
|
|
lockdep_assert_held_write(&kvm->mmu_lock);
|
|
|
|
lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
|
|
srcu_read_lock_held(&kvm->srcu));
|
|
|
|
if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
|
|
return;
|
|
|
|
update_gfn_write_track(slot, gfn, -1);
|
|
|
|
/*
|
|
* allow large page mapping for the tracked page
|
|
* after the tracker is gone.
|
|
*/
|
|
kvm_mmu_gfn_allow_lpage(slot, gfn);
|
|
}
|
|
|
|
/*
|
|
* check if the corresponding access on the specified guest page is tracked.
|
|
*/
|
|
bool kvm_gfn_is_write_tracked(struct kvm *kvm,
|
|
const struct kvm_memory_slot *slot, gfn_t gfn)
|
|
{
|
|
int index;
|
|
|
|
if (!slot)
|
|
return false;
|
|
|
|
if (!kvm_page_track_write_tracking_enabled(kvm))
|
|
return false;
|
|
|
|
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
|
|
return !!READ_ONCE(slot->arch.gfn_write_track[index]);
|
|
}
|
|
|
|
#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
|
|
void kvm_page_track_cleanup(struct kvm *kvm)
|
|
{
|
|
struct kvm_page_track_notifier_head *head;
|
|
|
|
head = &kvm->arch.track_notifier_head;
|
|
cleanup_srcu_struct(&head->track_srcu);
|
|
}
|
|
|
|
int kvm_page_track_init(struct kvm *kvm)
|
|
{
|
|
struct kvm_page_track_notifier_head *head;
|
|
|
|
head = &kvm->arch.track_notifier_head;
|
|
INIT_HLIST_HEAD(&head->track_notifier_list);
|
|
return init_srcu_struct(&head->track_srcu);
|
|
}
|
|
|
|
static int kvm_enable_external_write_tracking(struct kvm *kvm)
|
|
{
|
|
struct kvm_memslots *slots;
|
|
struct kvm_memory_slot *slot;
|
|
int r = 0, i, bkt;
|
|
|
|
mutex_lock(&kvm->slots_arch_lock);
|
|
|
|
/*
|
|
* Check for *any* write tracking user (not just external users) under
|
|
* lock. This avoids unnecessary work, e.g. if KVM itself is using
|
|
* write tracking, or if two external users raced when registering.
|
|
*/
|
|
if (kvm_page_track_write_tracking_enabled(kvm))
|
|
goto out_success;
|
|
|
|
for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
|
|
slots = __kvm_memslots(kvm, i);
|
|
kvm_for_each_memslot(slot, bkt, slots) {
|
|
/*
|
|
* Intentionally do NOT free allocations on failure to
|
|
* avoid having to track which allocations were made
|
|
* now versus when the memslot was created. The
|
|
* metadata is guaranteed to be freed when the slot is
|
|
* freed, and will be kept/used if userspace retries
|
|
* the failed ioctl() instead of killing the VM.
|
|
*/
|
|
r = kvm_page_track_write_tracking_alloc(slot);
|
|
if (r)
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
out_success:
|
|
/*
|
|
* Ensure that external_write_tracking_enabled becomes true strictly
|
|
* after all the related pointers are set.
|
|
*/
|
|
smp_store_release(&kvm->arch.external_write_tracking_enabled, true);
|
|
out_unlock:
|
|
mutex_unlock(&kvm->slots_arch_lock);
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* register the notifier so that event interception for the tracked guest
|
|
* pages can be received.
|
|
*/
|
|
int kvm_page_track_register_notifier(struct kvm *kvm,
|
|
struct kvm_page_track_notifier_node *n)
|
|
{
|
|
struct kvm_page_track_notifier_head *head;
|
|
int r;
|
|
|
|
if (!kvm || kvm->mm != current->mm)
|
|
return -ESRCH;
|
|
|
|
if (!kvm_external_write_tracking_enabled(kvm)) {
|
|
r = kvm_enable_external_write_tracking(kvm);
|
|
if (r)
|
|
return r;
|
|
}
|
|
|
|
kvm_get_kvm(kvm);
|
|
|
|
head = &kvm->arch.track_notifier_head;
|
|
|
|
write_lock(&kvm->mmu_lock);
|
|
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
|
|
write_unlock(&kvm->mmu_lock);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
|
|
|
|
/*
|
|
* stop receiving the event interception. It is the opposed operation of
|
|
* kvm_page_track_register_notifier().
|
|
*/
|
|
void kvm_page_track_unregister_notifier(struct kvm *kvm,
|
|
struct kvm_page_track_notifier_node *n)
|
|
{
|
|
struct kvm_page_track_notifier_head *head;
|
|
|
|
head = &kvm->arch.track_notifier_head;
|
|
|
|
write_lock(&kvm->mmu_lock);
|
|
hlist_del_rcu(&n->node);
|
|
write_unlock(&kvm->mmu_lock);
|
|
synchronize_srcu(&head->track_srcu);
|
|
|
|
kvm_put_kvm(kvm);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
|
|
|
|
/*
|
|
* Notify the node that write access is intercepted and write emulation is
|
|
* finished at this time.
|
|
*
|
|
* The node should figure out if the written page is the one that node is
|
|
* interested in by itself.
|
|
*/
|
|
void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
|
|
{
|
|
struct kvm_page_track_notifier_head *head;
|
|
struct kvm_page_track_notifier_node *n;
|
|
int idx;
|
|
|
|
head = &kvm->arch.track_notifier_head;
|
|
|
|
if (hlist_empty(&head->track_notifier_list))
|
|
return;
|
|
|
|
idx = srcu_read_lock(&head->track_srcu);
|
|
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
|
srcu_read_lock_held(&head->track_srcu))
|
|
if (n->track_write)
|
|
n->track_write(gpa, new, bytes, n);
|
|
srcu_read_unlock(&head->track_srcu, idx);
|
|
}
|
|
|
|
/*
|
|
* Notify external page track nodes that a memory region is being removed from
|
|
* the VM, e.g. so that users can free any associated metadata.
|
|
*/
|
|
void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
|
|
{
|
|
struct kvm_page_track_notifier_head *head;
|
|
struct kvm_page_track_notifier_node *n;
|
|
int idx;
|
|
|
|
head = &kvm->arch.track_notifier_head;
|
|
|
|
if (hlist_empty(&head->track_notifier_list))
|
|
return;
|
|
|
|
idx = srcu_read_lock(&head->track_srcu);
|
|
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
|
|
srcu_read_lock_held(&head->track_srcu))
|
|
if (n->track_remove_region)
|
|
n->track_remove_region(slot->base_gfn, slot->npages, n);
|
|
srcu_read_unlock(&head->track_srcu, idx);
|
|
}
|
|
|
|
/*
|
|
* add guest page to the tracking pool so that corresponding access on that
|
|
* page will be intercepted.
|
|
*
|
|
* @kvm: the guest instance we are interested in.
|
|
* @gfn: the guest page.
|
|
*/
|
|
int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
|
|
{
|
|
struct kvm_memory_slot *slot;
|
|
int idx;
|
|
|
|
idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
slot = gfn_to_memslot(kvm, gfn);
|
|
if (!slot) {
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
return -EINVAL;
|
|
}
|
|
|
|
write_lock(&kvm->mmu_lock);
|
|
__kvm_write_track_add_gfn(kvm, slot, gfn);
|
|
write_unlock(&kvm->mmu_lock);
|
|
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
|
|
|
|
/*
|
|
* remove the guest page from the tracking pool which stops the interception
|
|
* of corresponding access on that page.
|
|
*
|
|
* @kvm: the guest instance we are interested in.
|
|
* @gfn: the guest page.
|
|
*/
|
|
int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
|
|
{
|
|
struct kvm_memory_slot *slot;
|
|
int idx;
|
|
|
|
idx = srcu_read_lock(&kvm->srcu);
|
|
|
|
slot = gfn_to_memslot(kvm, gfn);
|
|
if (!slot) {
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
return -EINVAL;
|
|
}
|
|
|
|
write_lock(&kvm->mmu_lock);
|
|
__kvm_write_track_remove_gfn(kvm, slot, gfn);
|
|
write_unlock(&kvm->mmu_lock);
|
|
|
|
srcu_read_unlock(&kvm->srcu, idx);
|
|
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
|
|
#endif
|