[RFC PATCH v2 11/19] RISC-V: KVM: Implement VMID allocator

From: Anup Patel
Date: Fri Aug 02 2019 - 03:48:10 EST

Next message: Anup Patel: "[RFC PATCH v2 12/19] RISC-V: KVM: Implement stage2 page table programming"
Previous message: Laurent Pinchart: "Re: [PATCH/RFC 01/12] dt-bindings: display: renesas: lvds: RZ/G2E needs renesas,companion too"
In reply to: Anup Patel: "Re: [RFC PATCH v2 10/19] RISC-V: KVM: Handle WFI exits for VCPU"
Next in thread: Paolo Bonzini: "Re: [RFC PATCH v2 11/19] RISC-V: KVM: Implement VMID allocator"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

We implement a simple VMID allocator for Guests/VMs which:
1. Detects number of VMID bits at boot-time
2. Uses atomic number to track VMID version and increments
VMID version whenever we run-out of VMIDs
3. Flushes Guest TLBs on all host CPUs whenever we run-out
of VMIDs
4. Force updates HW Stage2 VMID for each Guest VCPU whenever
VMID changes using VCPU request KVM_REQ_UPDATE_HGATP

Signed-off-by: Anup Patel <anup.patel@xxxxxxx>
---
arch/riscv/include/asm/kvm_host.h | 21 +++++
arch/riscv/kvm/Makefile | 3 +-
arch/riscv/kvm/main.c | 4 +
arch/riscv/kvm/tlb.S | 43 +++++++++++
arch/riscv/kvm/vcpu.c | 6 ++
arch/riscv/kvm/vm.c | 6 ++
arch/riscv/kvm/vmid.c | 124 ++++++++++++++++++++++++++++++
7 files changed, 206 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/kvm/tlb.S
create mode 100644 arch/riscv/kvm/vmid.c

diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 45225baa7d0c..ca2b4f2b558c 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
#define KVM_REQ_SLEEP \
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1)
+#define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2)

struct kvm_vm_stat {
ulong remote_tlb_flush;
@@ -47,7 +48,15 @@ struct kvm_vcpu_stat {
struct kvm_arch_memory_slot {
};

+struct kvm_vmid {
+ unsigned long vmid_version;
+ unsigned long vmid;
+};
+
struct kvm_arch {
+ /* stage2 vmid */
+ struct kvm_vmid vmid;
+
/* stage2 page table */
pgd_t *pgd;
phys_addr_t pgd_phys;
@@ -155,6 +164,12 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}

+extern void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long vmid,
+ unsigned long gpa);
+extern void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid);
+extern void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa);
+extern void __kvm_riscv_hfence_gvma_all(void);
+
int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long hva,
bool is_write);
void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu);
@@ -162,6 +177,12 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm);
void kvm_riscv_stage2_free_pgd(struct kvm *kvm);
void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu);

+void kvm_riscv_stage2_vmid_detect(void);
+unsigned long kvm_riscv_stage2_vmid_bits(void);
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm);
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid);
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu);
+
int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long scause, unsigned long stval);
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 845579273727..c0f57f26c13d 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -8,6 +8,7 @@ ccflags-y := -Ivirt/kvm -Iarch/riscv/kvm

kvm-objs := $(common-objs-y)

-kvm-objs += main.o vm.o mmu.o vcpu.o vcpu_exit.o vcpu_switch.o
+kvm-objs += main.o vm.o vmid.o tlb.o mmu.o
+kvm-objs += vcpu.o vcpu_exit.o vcpu_switch.o

obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index a26a68df7cfc..379fc960d412 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -64,8 +64,12 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}

+ kvm_riscv_stage2_vmid_detect();
+
kvm_info("hypervisor extension available\n");

+ kvm_info("host has %ld VMID bits\n", kvm_riscv_stage2_vmid_bits());
+
return 0;
}

diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
new file mode 100644
index 000000000000..453fca8d7940
--- /dev/null
+++ b/arch/riscv/kvm/tlb.S
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@xxxxxxx>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+
+ .text
+ .altmacro
+ .option norelax
+
+ /*
+ * Instruction encoding of hfence.gvma is:
+ * 0110001 rs2(5) rs1(5) 000 00000 1110011
+ */
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
+ /* hfence.gvma a1, a0 */
+ .word 0x62a60073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_vmid)
+ /* hfence.gvma zero, a0 */
+ .word 0x62a00073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_vmid)
+
+ENTRY(__kvm_riscv_hfence_gvma_gpa)
+ /* hfence.gvma a0 */
+ .word 0x62050073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_gpa)
+
+ENTRY(__kvm_riscv_hfence_gvma_all)
+ /* hfence.gvma */
+ .word 0x62000073
+ ret
+ENDPROC(__kvm_riscv_hfence_gvma_all)
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 652004aa1c2b..69548ddaa272 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -625,6 +625,9 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)

if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
kvm_riscv_reset_vcpu(vcpu);
+
+ if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
+ kvm_riscv_stage2_update_hgatp(vcpu);
}
}

@@ -656,6 +659,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
/* Check conditions before entering the guest */
cond_resched();

+ kvm_riscv_stage2_vmid_update(vcpu);
+
kvm_riscv_check_vcpu_requests(vcpu);

preempt_disable();
@@ -686,6 +691,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_riscv_vcpu_flush_interrupts(vcpu, true);

if (ret <= 0 ||
+ kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index ac0211820521..c5aab5478c38 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -26,6 +26,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (r)
return r;

+ r = kvm_riscv_stage2_vmid_init(kvm);
+ if (r) {
+ kvm_riscv_stage2_free_pgd(kvm);
+ return r;
+ }
+
return 0;
}

diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
new file mode 100644
index 000000000000..deee60013890
--- /dev/null
+++ b/arch/riscv/kvm/vmid.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel <anup.patel@xxxxxxx>
+ */
+
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+
+static unsigned long vmid_version = 1;
+static unsigned long vmid_next;
+static unsigned long vmid_bits;
+static DEFINE_SPINLOCK(vmid_lock);
+
+void kvm_riscv_stage2_vmid_detect(void)
+{
+ unsigned long old;
+
+ /* Figure-out number of VMID bits in HW */
+ old = csr_read(CSR_HGATP);
+ csr_write(CSR_HGATP, old | HGATP_VMID_MASK);
+ vmid_bits = csr_read(CSR_HGATP);
+ vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT;
+ vmid_bits = fls_long(vmid_bits);
+ csr_write(CSR_HGATP, old);
+
+ /* We polluted local TLB so flush all guest TLB */
+ __kvm_riscv_hfence_gvma_all();
+
+ /* We don't use VMID bits if they are not sufficient */
+ if ((1UL << vmid_bits) < num_possible_cpus())
+ vmid_bits = 0;
+}
+
+unsigned long kvm_riscv_stage2_vmid_bits(void)
+{
+ return vmid_bits;
+}
+
+int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
+{
+ /* Mark the initial VMID and VMID version invalid */
+ kvm->arch.vmid.vmid_version = 0;
+ kvm->arch.vmid.vmid = 0;
+
+ return 0;
+}
+
+static void local_guest_tlb_flush(void *info)
+{
+ __kvm_riscv_hfence_gvma_all();
+}
+
+static void force_exit_and_guest_tlb_flush(const cpumask_t *mask)
+{
+ preempt_disable();
+ smp_call_function_many(mask, local_guest_tlb_flush, NULL, true);
+ preempt_enable();
+}
+
+bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
+{
+ if (!vmid_bits)
+ return false;
+
+ return unlikely(READ_ONCE(vmid->vmid_version) !=
+ READ_ONCE(vmid_version));
+}
+
+void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
+{
+ int i;
+ struct kvm_vcpu *v;
+ struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
+
+ if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
+ return;
+
+ spin_lock(&vmid_lock);
+
+ /*
+ * We need to re-check the vmid_version here to ensure that if
+ * another vcpu already allocated a valid vmid for this vm.
+ */
+ if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) {
+ spin_unlock(&vmid_lock);
+ return;
+ }
+
+ /* First user of a new VMID version? */
+ if (unlikely(vmid_next == 0)) {
+ WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1);
+ vmid_next = 1;
+
+ /*
+ * On SMP we know no other CPUs can use this CPU's or
+ * each other's VMID after forced exit returns since the
+ * vmid_lock blocks them from re-entry to the guest.
+ */
+ force_exit_and_guest_tlb_flush(cpu_all_mask);
+ }
+
+ vmid->vmid = vmid_next;
+ vmid_next++;
+ vmid_next &= (1 << vmid_bits) - 1;
+
+ /* Ensure VMID next update is completed */
+ smp_wmb();
+
+ WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version));
+
+ spin_unlock(&vmid_lock);
+
+ /* Request stage2 page table update for all VCPUs */
+ kvm_for_each_vcpu(i, v, vcpu->kvm)
+ kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
+}
--
2.17.1

Next message: Anup Patel: "[RFC PATCH v2 12/19] RISC-V: KVM: Implement stage2 page table programming"
Previous message: Laurent Pinchart: "Re: [PATCH/RFC 01/12] dt-bindings: display: renesas: lvds: RZ/G2E needs renesas,companion too"
In reply to: Anup Patel: "Re: [RFC PATCH v2 10/19] RISC-V: KVM: Handle WFI exits for VCPU"
Next in thread: Paolo Bonzini: "Re: [RFC PATCH v2 11/19] RISC-V: KVM: Implement VMID allocator"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]