[RFC PATCH 09/24] KVM: SEV: Generalize tracking ASID->vCPU with xarrays

From: Yosry Ahmed
Date: Wed Mar 26 2025 - 15:41:26 EST


Following changes will track ASID to vCPU mappings for all ASIDs, not
just SEV ASIDs. Using per-CPU arrays with the maximum possible number of
ASIDs would be too expensive. Use xarrays to generalize tracking the
mappings instead. The logic is also mostly moved outside the SEV code to
allow future changes to reuse it for normal SVM VMs.

Storing into an xarray is more expensive than reading/writing to an
array, but is only done on vCPU load and should be mostly uncontended.
Also, the size of the xarray should be O(# of VMs), so it is not
expected to be huge. In fact, the xarray will probably use less memory
than the normal array even for SEV on machines that only run a few VMs.

When a new ASID is allocated, reserve an entry for it on all xarrays on
all CPUs. This allows the memory allocations to happen in a more relaxed
context (allowing reclaim and accounting), and failures to be handled at
VM creation time. However, entries will be allocated even on CPUs that
never run the VM.

The alternative is relying on on-demand GFP_ATOMIC allocations with
xa_store() on vCPU load. These allocations are more likely to fail and
more difficult to handle since vCPU load cannot fail. Flushing the TLB
if the xa_store() fails is probably sufficient handling, but
preallocations are easier to reason about.

Signed-off-by: Yosry Ahmed <yosry.ahmed@xxxxxxxxx>
---
arch/x86/kvm/svm/sev.c | 25 ++++-----------------
arch/x86/kvm/svm/svm.c | 50 +++++++++++++++++++++++++++++++-----------
arch/x86/kvm/svm/svm.h | 7 +++---
3 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1742f51d4c194..c11da3259c089 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -211,6 +211,9 @@ static int sev_asid_new(struct kvm_sev_info *sev)
goto e_uncharge;
}

+ if (!svm_register_asid(asid))
+ goto e_uncharge;
+
__set_bit(asid, sev_asid_bitmap);

mutex_unlock(&sev_bitmap_lock);
@@ -231,18 +234,10 @@ unsigned int sev_get_asid(struct kvm *kvm)

static void sev_asid_free(struct kvm_sev_info *sev)
{
- struct svm_cpu_data *sd;
- int cpu;
+ svm_unregister_asid(sev->asid);

mutex_lock(&sev_bitmap_lock);
-
__set_bit(sev->asid, sev_reclaim_asid_bitmap);
-
- for_each_possible_cpu(cpu) {
- sd = per_cpu_ptr(&svm_data, cpu);
- sd->sev_vcpus[sev->asid] = NULL;
- }
-
mutex_unlock(&sev_bitmap_lock);

sev_misc_cg_uncharge(sev);
@@ -3076,18 +3071,6 @@ void sev_hardware_unsetup(void)
misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
}

-int sev_cpu_init(struct svm_cpu_data *sd)
-{
- if (!sev_enabled)
- return 0;
-
- sd->sev_vcpus = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
- if (!sd->sev_vcpus)
- return -ENOMEM;
-
- return 0;
-}
-
/*
* Pages used by hardware to hold guest encrypted state must be flushed before
* returning them to the system.
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ce67112732e8c..b740114a9d9bc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -694,7 +694,7 @@ static void svm_cpu_uninit(int cpu)
if (!sd->save_area)
return;

- kfree(sd->sev_vcpus);
+ xa_destroy(&sd->asid_vcpu);
__free_page(__sme_pa_to_page(sd->save_area_pa));
sd->save_area_pa = 0;
sd->save_area = NULL;
@@ -711,18 +711,11 @@ static int svm_cpu_init(int cpu)
if (!save_area_page)
return ret;

- ret = sev_cpu_init(sd);
- if (ret)
- goto free_save_area;
+ xa_init(&sd->asid_vcpu);

sd->save_area = page_address(save_area_page);
sd->save_area_pa = __sme_page_pa(save_area_page);
return 0;
-
-free_save_area:
- __free_page(save_area_page);
- return ret;
-
}

static void set_dr_intercepts(struct vcpu_svm *svm)
@@ -1557,6 +1550,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
unsigned int asid;
struct vcpu_svm *svm = to_svm(vcpu);
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
+ struct kvm_vcpu *prev;

if (vcpu->scheduled_out && !kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
@@ -1573,13 +1567,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (sev_guest(vcpu->kvm)) {
/*
* Flush the TLB when a different vCPU using the same ASID is
- * run on the same CPU.
+ * run on the same CPU. xa_store() should always succeed because
+ * the entry is reserved when the ASID is allocated.
*/
asid = sev_get_asid(vcpu->kvm);
- if (sd->sev_vcpus[asid] != vcpu) {
- sd->sev_vcpus[asid] = vcpu;
+ prev = xa_store(&sd->asid_vcpu, asid, vcpu, GFP_ATOMIC);
+ if (prev != vcpu || WARN_ON_ONCE(xa_err(prev)))
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- }
}
}

@@ -5047,6 +5041,36 @@ static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
sev_vcpu_deliver_sipi_vector(vcpu, vector);
}

+void svm_unregister_asid(unsigned int asid)
+{
+ struct svm_cpu_data *sd;
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ sd = per_cpu_ptr(&svm_data, cpu);
+ xa_erase(&sd->asid_vcpu, asid);
+ }
+}
+
+bool svm_register_asid(unsigned int asid)
+{
+ struct svm_cpu_data *sd;
+ int cpu;
+
+ /*
+ * Preallocate entries on all CPUs for the ASID to avoid memory
+ * allocations in the vCPU load path.
+ */
+ for_each_possible_cpu(cpu) {
+ sd = per_cpu_ptr(&svm_data, cpu);
+ if (xa_reserve(&sd->asid_vcpu, asid, GFP_KERNEL_ACCOUNT)) {
+ svm_unregister_asid(asid);
+ return false;
+ }
+ }
+ return true;
+}
+
static void svm_vm_destroy(struct kvm *kvm)
{
avic_vm_destroy(kvm);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 3ab2a424992c1..4929b96d3d700 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -340,8 +340,7 @@ struct svm_cpu_data {

struct vmcb *current_vmcb;

- /* index = sev_asid, value = vcpu pointer */
- struct kvm_vcpu **sev_vcpus;
+ struct xarray asid_vcpu;
};

DECLARE_PER_CPU(struct svm_cpu_data, svm_data);
@@ -655,6 +654,8 @@ void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool disable);
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
int trig_mode, int vec);
+bool svm_register_asid(unsigned int asid);
+void svm_unregister_asid(unsigned int asid);

/* nested.c */

@@ -793,7 +794,6 @@ void sev_vm_destroy(struct kvm *kvm);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
void sev_hardware_unsetup(void);
-int sev_cpu_init(struct svm_cpu_data *sd);
int sev_dev_get_attr(u32 group, u64 attr, u64 *val);
extern unsigned int max_sev_asid;
void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
@@ -817,7 +817,6 @@ static inline void sev_vm_destroy(struct kvm *kvm) {}
static inline void __init sev_set_cpu_caps(void) {}
static inline void __init sev_hardware_setup(void) {}
static inline void sev_hardware_unsetup(void) {}
-static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; }
static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; }
#define max_sev_asid 0
static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {}
--
2.49.0.395.g12beb8f557-goog