[PATCH 2/4] KVM: SVM: Move Nested SVM Implementation to nested.c

From: Joerg Roedel
Date: Tue Mar 24 2020 - 05:42:08 EST


From: Joerg Roedel <jroedel@xxxxxxx>

Split out the code for the nested SVM implementation and move it to a
separate file.

Signed-off-by: Joerg Roedel <jroedel@xxxxxxx>
---
arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/svm/nested.c | 823 ++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 1155 +------------------------------------
arch/x86/kvm/svm/svm.h | 381 ++++++++++++
4 files changed, 1216 insertions(+), 1145 deletions(-)
create mode 100644 arch/x86/kvm/svm/nested.c
create mode 100644 arch/x86/kvm/svm/svm.h

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c6f14e3cc5ab..63ae654f7f97 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@ kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o

kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
-kvm-amd-y += svm/svm.o svm/pmu.o
+kvm-amd-y += svm/svm.o svm/pmu.o svm/nested.o

obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
new file mode 100644
index 000000000000..961f413626d0
--- /dev/null
+++ b/arch/x86/kvm/svm/nested.c
@@ -0,0 +1,823 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * AMD SVM support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ * Yaniv Kamay <yaniv@xxxxxxxxxxxx>
+ * Avi Kivity <avi@xxxxxxxxxxxx>
+ */
+
+#define pr_fmt(fmt) "SVM: " fmt
+
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+
+#include <asm/msr-index.h>
+
+#include "kvm_emulate.h"
+#include "trace.h"
+#include "mmu.h"
+#include "x86.h"
+#include "svm.h"
+
+static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
+ /*
+ * TODO: track the cause of the nested page fault, and
+ * correctly fill in the high bits of exit_info_1.
+ */
+ svm->vmcb->control.exit_code = SVM_EXIT_NPF;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = (1ULL << 32);
+ svm->vmcb->control.exit_info_2 = fault->address;
+ }
+
+ svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
+ svm->vmcb->control.exit_info_1 |= fault->error_code;
+
+ /*
+ * The present bit is always zero for page structure faults on real
+ * hardware.
+ */
+ if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
+ svm->vmcb->control.exit_info_1 &= ~1;
+
+ nested_svm_vmexit(svm);
+}
+
+static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ u64 cr3 = svm->nested.nested_cr3;
+ u64 pdpte;
+ int ret;
+
+ ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
+ offset_in_page(cr3) + index * 8, 8);
+ if (ret)
+ return 0;
+ return pdpte;
+}
+
+static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm->nested.nested_cr3;
+}
+
+static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(mmu_is_nested(vcpu));
+
+ vcpu->arch.mmu = &vcpu->arch.guest_mmu;
+ kvm_init_shadow_mmu(vcpu);
+ vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
+ vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
+ vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
+ vcpu->arch.mmu->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
+ reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
+ vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
+}
+
+static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.mmu = &vcpu->arch.root_mmu;
+ vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
+}
+
+void recalc_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb_control_area *c, *h;
+ struct nested_state *g;
+
+ mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+ if (!is_guest_mode(&svm->vcpu))
+ return;
+
+ c = &svm->vmcb->control;
+ h = &svm->nested.hsave->control;
+ g = &svm->nested;
+
+ c->intercept_cr = h->intercept_cr;
+ c->intercept_dr = h->intercept_dr;
+ c->intercept_exceptions = h->intercept_exceptions;
+ c->intercept = h->intercept;
+
+ if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
+ /* We only want the cr8 intercept bits of L1 */
+ c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
+ c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
+
+ /*
+ * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
+ * affect any interrupt we may want to inject; therefore,
+ * interrupt window vmexits are irrelevant to L0.
+ */
+ c->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ }
+
+ /* We don't want to see VMMCALLs from a nested guest */
+ c->intercept &= ~(1ULL << INTERCEPT_VMMCALL);
+
+ c->intercept_cr |= g->intercept_cr;
+ c->intercept_dr |= g->intercept_dr;
+ c->intercept_exceptions |= g->intercept_exceptions;
+ c->intercept |= g->intercept;
+}
+
+static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
+{
+ struct vmcb_control_area *dst = &dst_vmcb->control;
+ struct vmcb_control_area *from = &from_vmcb->control;
+
+ dst->intercept_cr = from->intercept_cr;
+ dst->intercept_dr = from->intercept_dr;
+ dst->intercept_exceptions = from->intercept_exceptions;
+ dst->intercept = from->intercept;
+ dst->iopm_base_pa = from->iopm_base_pa;
+ dst->msrpm_base_pa = from->msrpm_base_pa;
+ dst->tsc_offset = from->tsc_offset;
+ dst->asid = from->asid;
+ dst->tlb_ctl = from->tlb_ctl;
+ dst->int_ctl = from->int_ctl;
+ dst->int_vector = from->int_vector;
+ dst->int_state = from->int_state;
+ dst->exit_code = from->exit_code;
+ dst->exit_code_hi = from->exit_code_hi;
+ dst->exit_info_1 = from->exit_info_1;
+ dst->exit_info_2 = from->exit_info_2;
+ dst->exit_int_info = from->exit_int_info;
+ dst->exit_int_info_err = from->exit_int_info_err;
+ dst->nested_ctl = from->nested_ctl;
+ dst->event_inj = from->event_inj;
+ dst->event_inj_err = from->event_inj_err;
+ dst->nested_cr3 = from->nested_cr3;
+ dst->virt_ext = from->virt_ext;
+ dst->pause_filter_count = from->pause_filter_count;
+ dst->pause_filter_thresh = from->pause_filter_thresh;
+}
+
+static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
+{
+ /*
+ * This function merges the msr permission bitmaps of kvm and the
+ * nested vmcb. It is optimized in that it only merges the parts where
+ * the kvm msr permission bitmap may contain zero bits
+ */
+ int i;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return true;
+
+ for (i = 0; i < MSRPM_OFFSETS; i++) {
+ u32 value, p;
+ u64 offset;
+
+ if (msrpm_offsets[i] == 0xffffffff)
+ break;
+
+ p = msrpm_offsets[i];
+ offset = svm->nested.vmcb_msrpm + (p * 4);
+
+ if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
+ return false;
+
+ svm->nested.msrpm[p] = svm->msrpm[p] | value;
+ }
+
+ svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
+
+ return true;
+}
+
+static bool nested_vmcb_checks(struct vmcb *vmcb)
+{
+ if ((vmcb->save.efer & EFER_SVME) == 0)
+ return false;
+
+ if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
+ return false;
+
+ if (vmcb->control.asid == 0)
+ return false;
+
+ if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+ !npt_enabled)
+ return false;
+
+ return true;
+}
+
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+ struct vmcb *nested_vmcb, struct kvm_host_map *map)
+{
+ bool evaluate_pending_interrupts =
+ is_intercept(svm, INTERCEPT_VINTR) ||
+ is_intercept(svm, INTERCEPT_IRET);
+
+ if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
+ svm->vcpu.arch.hflags |= HF_HIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+ if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
+ svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
+ nested_svm_init_mmu_context(&svm->vcpu);
+ }
+
+ /* Load the nested guest state */
+ svm->vmcb->save.es = nested_vmcb->save.es;
+ svm->vmcb->save.cs = nested_vmcb->save.cs;
+ svm->vmcb->save.ss = nested_vmcb->save.ss;
+ svm->vmcb->save.ds = nested_vmcb->save.ds;
+ svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+ svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+ kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
+ svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+ svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+ svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+ svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+ } else
+ (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+
+ /* Guest paging mode is active - reset mmu */
+ kvm_mmu_reset_context(&svm->vcpu);
+
+ svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
+ kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
+ kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
+ kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
+
+ /* In case we don't even reach vcpu_run, the fields are not updated */
+ svm->vmcb->save.rax = nested_vmcb->save.rax;
+ svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+ svm->vmcb->save.rip = nested_vmcb->save.rip;
+ svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+ svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+ svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
+ svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
+
+ /* cache intercepts */
+ svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
+ svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
+ svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
+ svm->nested.intercept = nested_vmcb->control.intercept;
+
+ svm_flush_tlb(&svm->vcpu, true);
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+ if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+ svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+ svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
+ svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+
+ svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
+ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+ svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+ svm->vmcb->control.pause_filter_count =
+ nested_vmcb->control.pause_filter_count;
+ svm->vmcb->control.pause_filter_thresh =
+ nested_vmcb->control.pause_filter_thresh;
+
+ kvm_vcpu_unmap(&svm->vcpu, map, true);
+
+ /* Enter Guest-Mode */
+ enter_guest_mode(&svm->vcpu);
+
+ /*
+ * Merge guest and host intercepts - must be called with vcpu in
+ * guest-mode to take affect here
+ */
+ recalc_intercepts(svm);
+
+ svm->nested.vmcb = vmcb_gpa;
+
+ /*
+ * If L1 had a pending IRQ/NMI before executing VMRUN,
+ * which wasn't delivered because it was disallowed (e.g.
+ * interrupts disabled), L0 needs to evaluate if this pending
+ * event should cause an exit from L2 to L1 or be delivered
+ * directly to L2.
+ *
+ * Usually this would be handled by the processor noticing an
+ * IRQ/NMI window request. However, VMRUN can unblock interrupts
+ * by implicitly setting GIF, so force L0 to perform pending event
+ * evaluation by requesting a KVM_REQ_EVENT.
+ */
+ enable_gif(svm);
+ if (unlikely(evaluate_pending_interrupts))
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
+
+ mark_all_dirty(svm->vmcb);
+}
+
+int nested_svm_vmrun(struct vcpu_svm *svm)
+{
+ int ret;
+ struct vmcb *nested_vmcb;
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
+ struct kvm_host_map map;
+ u64 vmcb_gpa;
+
+ vmcb_gpa = svm->vmcb->save.rax;
+
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
+ if (ret == -EINVAL) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ } else if (ret) {
+ return kvm_skip_emulated_instruction(&svm->vcpu);
+ }
+
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
+
+ nested_vmcb = map.hva;
+
+ if (!nested_vmcb_checks(nested_vmcb)) {
+ nested_vmcb->control.exit_code = SVM_EXIT_ERR;
+ nested_vmcb->control.exit_code_hi = 0;
+ nested_vmcb->control.exit_info_1 = 0;
+ nested_vmcb->control.exit_info_2 = 0;
+
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
+
+ return ret;
+ }
+
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
+ nested_vmcb->save.rip,
+ nested_vmcb->control.int_ctl,
+ nested_vmcb->control.event_inj,
+ nested_vmcb->control.nested_ctl);
+
+ trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
+ nested_vmcb->control.intercept_cr >> 16,
+ nested_vmcb->control.intercept_exceptions,
+ nested_vmcb->control.intercept);
+
+ /* Clear internal status */
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
+
+ /*
+ * Save the old vmcb, so we don't need to pick what we save, but can
+ * restore everything when a VMEXIT occurs
+ */
+ hsave->save.es = vmcb->save.es;
+ hsave->save.cs = vmcb->save.cs;
+ hsave->save.ss = vmcb->save.ss;
+ hsave->save.ds = vmcb->save.ds;
+ hsave->save.gdtr = vmcb->save.gdtr;
+ hsave->save.idtr = vmcb->save.idtr;
+ hsave->save.efer = svm->vcpu.arch.efer;
+ hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
+ hsave->save.cr4 = svm->vcpu.arch.cr4;
+ hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
+ hsave->save.rip = kvm_rip_read(&svm->vcpu);
+ hsave->save.rsp = vmcb->save.rsp;
+ hsave->save.rax = vmcb->save.rax;
+ if (npt_enabled)
+ hsave->save.cr3 = vmcb->save.cr3;
+ else
+ hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
+
+ copy_vmcb_control_area(hsave, vmcb);
+
+ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
+
+ if (!nested_svm_vmrun_msrpm(svm)) {
+ svm->vmcb->control.exit_code = SVM_EXIT_ERR;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ nested_svm_vmexit(svm);
+ }
+
+ return ret;
+}
+
+void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+ to_vmcb->save.fs = from_vmcb->save.fs;
+ to_vmcb->save.gs = from_vmcb->save.gs;
+ to_vmcb->save.tr = from_vmcb->save.tr;
+ to_vmcb->save.ldtr = from_vmcb->save.ldtr;
+ to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
+ to_vmcb->save.star = from_vmcb->save.star;
+ to_vmcb->save.lstar = from_vmcb->save.lstar;
+ to_vmcb->save.cstar = from_vmcb->save.cstar;
+ to_vmcb->save.sfmask = from_vmcb->save.sfmask;
+ to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
+ to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
+ to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+}
+
+int nested_svm_vmexit(struct vcpu_svm *svm)
+{
+ int rc;
+ struct vmcb *nested_vmcb;
+ struct vmcb *hsave = svm->nested.hsave;
+ struct vmcb *vmcb = svm->vmcb;
+ struct kvm_host_map map;
+
+ trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
+ vmcb->control.exit_info_1,
+ vmcb->control.exit_info_2,
+ vmcb->control.exit_int_info,
+ vmcb->control.exit_int_info_err,
+ KVM_ISA_SVM);
+
+ rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ }
+
+ nested_vmcb = map.hva;
+
+ /* Exit Guest-Mode */
+ leave_guest_mode(&svm->vcpu);
+ svm->nested.vmcb = 0;
+
+ /* Give the current vmcb to the guest */
+ disable_gif(svm);
+
+ nested_vmcb->save.es = vmcb->save.es;
+ nested_vmcb->save.cs = vmcb->save.cs;
+ nested_vmcb->save.ss = vmcb->save.ss;
+ nested_vmcb->save.ds = vmcb->save.ds;
+ nested_vmcb->save.gdtr = vmcb->save.gdtr;
+ nested_vmcb->save.idtr = vmcb->save.idtr;
+ nested_vmcb->save.efer = svm->vcpu.arch.efer;
+ nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
+ nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
+ nested_vmcb->save.cr2 = vmcb->save.cr2;
+ nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
+ nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
+ nested_vmcb->save.rip = vmcb->save.rip;
+ nested_vmcb->save.rsp = vmcb->save.rsp;
+ nested_vmcb->save.rax = vmcb->save.rax;
+ nested_vmcb->save.dr7 = vmcb->save.dr7;
+ nested_vmcb->save.dr6 = vmcb->save.dr6;
+ nested_vmcb->save.cpl = vmcb->save.cpl;
+
+ nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
+ nested_vmcb->control.int_vector = vmcb->control.int_vector;
+ nested_vmcb->control.int_state = vmcb->control.int_state;
+ nested_vmcb->control.exit_code = vmcb->control.exit_code;
+ nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
+ nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
+ nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
+ nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
+ nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
+
+ if (svm->nrips_enabled)
+ nested_vmcb->control.next_rip = vmcb->control.next_rip;
+
+ /*
+ * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
+ * to make sure that we do not lose injected events. So check event_inj
+ * here and copy it to exit_int_info if it is valid.
+ * Exit_int_info and event_inj can't be both valid because the case
+ * below only happens on a VMRUN instruction intercept which has
+ * no valid exit_int_info set.
+ */
+ if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
+ struct vmcb_control_area *nc = &nested_vmcb->control;
+
+ nc->exit_int_info = vmcb->control.event_inj;
+ nc->exit_int_info_err = vmcb->control.event_inj_err;
+ }
+
+ nested_vmcb->control.tlb_ctl = 0;
+ nested_vmcb->control.event_inj = 0;
+ nested_vmcb->control.event_inj_err = 0;
+
+ nested_vmcb->control.pause_filter_count =
+ svm->vmcb->control.pause_filter_count;
+ nested_vmcb->control.pause_filter_thresh =
+ svm->vmcb->control.pause_filter_thresh;
+
+ /* We always set V_INTR_MASKING and remember the old value in hflags */
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* Restore the original control entries */
+ copy_vmcb_control_area(vmcb, hsave);
+
+ svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
+
+ svm->nested.nested_cr3 = 0;
+
+ /* Restore selected save entries */
+ svm->vmcb->save.es = hsave->save.es;
+ svm->vmcb->save.cs = hsave->save.cs;
+ svm->vmcb->save.ss = hsave->save.ss;
+ svm->vmcb->save.ds = hsave->save.ds;
+ svm->vmcb->save.gdtr = hsave->save.gdtr;
+ svm->vmcb->save.idtr = hsave->save.idtr;
+ kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
+ svm_set_efer(&svm->vcpu, hsave->save.efer);
+ svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(&svm->vcpu, hsave->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = hsave->save.cr3;
+ svm->vcpu.arch.cr3 = hsave->save.cr3;
+ } else {
+ (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
+ }
+ kvm_rax_write(&svm->vcpu, hsave->save.rax);
+ kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
+ kvm_rip_write(&svm->vcpu, hsave->save.rip);
+ svm->vmcb->save.dr7 = 0;
+ svm->vmcb->save.cpl = 0;
+ svm->vmcb->control.exit_int_info = 0;
+
+ mark_all_dirty(svm->vmcb);
+
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
+
+ nested_svm_uninit_mmu_context(&svm->vcpu);
+ kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_load(&svm->vcpu);
+
+ /*
+ * Drop what we picked up for L2 via svm_complete_interrupts() so it
+ * doesn't end up in L1.
+ */
+ svm->vcpu.arch.nmi_injected = false;
+ kvm_clear_exception_queue(&svm->vcpu);
+ kvm_clear_interrupt_queue(&svm->vcpu);
+
+ return 0;
+}
+
+static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
+{
+ u32 offset, msr, value;
+ int write, mask;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return NESTED_EXIT_HOST;
+
+ msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ offset = svm_msrpm_offset(msr);
+ write = svm->vmcb->control.exit_info_1 & 1;
+ mask = 1 << ((2 * (msr & 0xf)) + write);
+
+ if (offset == MSR_INVALID)
+ return NESTED_EXIT_DONE;
+
+ /* Offset is in 32 bit units but need in 8 bit units */
+ offset *= 4;
+
+ if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
+ return NESTED_EXIT_DONE;
+
+ return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
+}
+
+/* DB exceptions for our internal use must not cause vmexit */
+static int nested_svm_intercept_db(struct vcpu_svm *svm)
+{
+ unsigned long dr6;
+
+ /* if we're not singlestepping, it's not ours */
+ if (!svm->nmi_singlestep)
+ return NESTED_EXIT_DONE;
+
+ /* if it's not a singlestep exception, it's not ours */
+ if (kvm_get_dr(&svm->vcpu, 6, &dr6))
+ return NESTED_EXIT_DONE;
+ if (!(dr6 & DR6_BS))
+ return NESTED_EXIT_DONE;
+
+ /* if the guest is singlestepping, it should get the vmexit */
+ if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
+ disable_nmi_singlestep(svm);
+ return NESTED_EXIT_DONE;
+ }
+
+ /* it's ours, the nested hypervisor must not see this one */
+ return NESTED_EXIT_HOST;
+}
+
+static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
+{
+ unsigned port, size, iopm_len;
+ u16 val, mask;
+ u8 start_bit;
+ u64 gpa;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
+ return NESTED_EXIT_HOST;
+
+ port = svm->vmcb->control.exit_info_1 >> 16;
+ size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
+ SVM_IOIO_SIZE_SHIFT;
+ gpa = svm->nested.vmcb_iopm + (port / 8);
+ start_bit = port % 8;
+ iopm_len = (start_bit + size > 8) ? 2 : 1;
+ mask = (0xf >> (4 - size)) << start_bit;
+ val = 0;
+
+ if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
+ return NESTED_EXIT_DONE;
+
+ return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
+}
+
+static int nested_svm_intercept(struct vcpu_svm *svm)
+{
+ u32 exit_code = svm->vmcb->control.exit_code;
+ int vmexit = NESTED_EXIT_HOST;
+
+ switch (exit_code) {
+ case SVM_EXIT_MSR:
+ vmexit = nested_svm_exit_handled_msr(svm);
+ break;
+ case SVM_EXIT_IOIO:
+ vmexit = nested_svm_intercept_ioio(svm);
+ break;
+ case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
+ u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
+ if (svm->nested.intercept_cr & bit)
+ vmexit = NESTED_EXIT_DONE;
+ break;
+ }
+ case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
+ u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
+ if (svm->nested.intercept_dr & bit)
+ vmexit = NESTED_EXIT_DONE;
+ break;
+ }
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+ if (svm->nested.intercept_exceptions & excp_bits) {
+ if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
+ vmexit = nested_svm_intercept_db(svm);
+ else
+ vmexit = NESTED_EXIT_DONE;
+ }
+ /* async page fault always cause vmexit */
+ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
+ svm->vcpu.arch.exception.nested_apf != 0)
+ vmexit = NESTED_EXIT_DONE;
+ break;
+ }
+ case SVM_EXIT_ERR: {
+ vmexit = NESTED_EXIT_DONE;
+ break;
+ }
+ default: {
+ u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
+ if (svm->nested.intercept & exit_bits)
+ vmexit = NESTED_EXIT_DONE;
+ }
+ }
+
+ return vmexit;
+}
+
+int nested_svm_exit_handled(struct vcpu_svm *svm)
+{
+ int vmexit;
+
+ vmexit = nested_svm_intercept(svm);
+
+ if (vmexit == NESTED_EXIT_DONE)
+ nested_svm_vmexit(svm);
+
+ return vmexit;
+}
+
+int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+ if (!(svm->vcpu.arch.efer & EFER_SVME) ||
+ !is_paging(&svm->vcpu)) {
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (svm->vmcb->save.cpl) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code)
+{
+ int vmexit;
+
+ if (!is_guest_mode(&svm->vcpu))
+ return 0;
+
+ vmexit = nested_svm_intercept(svm);
+ if (vmexit != NESTED_EXIT_DONE)
+ return 0;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = error_code;
+
+ /*
+ * EXITINFO2 is undefined for all exception intercepts other
+ * than #PF.
+ */
+ if (svm->vcpu.arch.exception.nested_apf)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
+ else if (svm->vcpu.arch.exception.has_payload)
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
+ else
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+
+ svm->nested.exit_required = true;
+ return vmexit;
+}
+
+static void nested_svm_intr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ /* nested_svm_vmexit this gets called afterwards from handle_exit */
+ svm->nested.exit_required = true;
+ trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+}
+
+static bool nested_exit_on_intr(struct vcpu_svm *svm)
+{
+ return (svm->nested.intercept & 1ULL);
+}
+
+int svm_check_nested_events(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ bool block_nested_events =
+ kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
+
+ if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
+ if (block_nested_events)
+ return -EBUSY;
+ nested_svm_intr(svm);
+ return 0;
+ }
+
+ return 0;
+}
+
+int nested_svm_exit_special(struct vcpu_svm *svm)
+{
+ u32 exit_code = svm->vmcb->control.exit_code;
+
+ switch (exit_code) {
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI:
+ case SVM_EXIT_EXCP_BASE + MC_VECTOR:
+ return NESTED_EXIT_HOST;
+ case SVM_EXIT_NPF:
+ /* For now we are always handling NPFs when using them */
+ if (npt_enabled)
+ return NESTED_EXIT_HOST;
+ break;
+ case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+ /* When we're shadowing, trap PFs, but not async PF */
+ if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
+ return NESTED_EXIT_HOST;
+ break;
+ default:
+ break;
+ }
+
+ return NESTED_EXIT_CONTINUE;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2125c6ae5951..b74ebc19e1f6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -52,6 +52,8 @@
#include <asm/virtext.h>
#include "trace.h"

+#include "svm.h"
+
#define __ex(x) __kvm_handle_fault_on_reboot(x)

MODULE_AUTHOR("Qumranet");
@@ -79,10 +81,6 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);

#define SVM_AVIC_DOORBELL 0xc001011b

-#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
-#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
-#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
-
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))

#define TSC_RATIO_RSVD 0xffffff0000000000ULL
@@ -116,68 +114,7 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);

static bool erratum_383_found __read_mostly;

-static const u32 host_save_user_msrs[] = {
-#ifdef CONFIG_X86_64
- MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
- MSR_FS_BASE,
-#endif
- MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
- MSR_TSC_AUX,
-};
-
-#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-
-struct kvm_sev_info {
- bool active; /* SEV enabled guest */
- unsigned int asid; /* ASID used for this guest */
- unsigned int handle; /* SEV firmware handle */
- int fd; /* SEV device fd */
- unsigned long pages_locked; /* Number of pages locked */
- struct list_head regions_list; /* List of registered regions */
-};
-
-struct kvm_svm {
- struct kvm kvm;
-
- /* Struct members for AVIC */
- u32 avic_vm_id;
- struct page *avic_logical_id_table_page;
- struct page *avic_physical_id_table_page;
- struct hlist_node hnode;
-
- struct kvm_sev_info sev_info;
-};
-
-struct kvm_vcpu;
-
-struct nested_state {
- struct vmcb *hsave;
- u64 hsave_msr;
- u64 vm_cr_msr;
- u64 vmcb;
-
- /* These are the merged vectors */
- u32 *msrpm;
-
- /* gpa pointers to the real vectors */
- u64 vmcb_msrpm;
- u64 vmcb_iopm;
-
- /* A VMEXIT is required but not yet emulated */
- bool exit_required;
-
- /* cache for intercepts of the guest */
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
-
- /* Nested Paging related state */
- u64 nested_cr3;
-};
-
-#define MSRPM_OFFSETS 16
-static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;

/*
* Set osvw_len to higher value when updated Revision Guides
@@ -185,70 +122,6 @@ static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
*/
static uint64_t osvw_len = 4, osvw_status;

-struct vcpu_svm {
- struct kvm_vcpu vcpu;
- struct vmcb *vmcb;
- unsigned long vmcb_pa;
- struct svm_cpu_data *svm_data;
- uint64_t asid_generation;
- uint64_t sysenter_esp;
- uint64_t sysenter_eip;
- uint64_t tsc_aux;
-
- u64 msr_decfg;
-
- u64 next_rip;
-
- u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
- struct {
- u16 fs;
- u16 gs;
- u16 ldt;
- u64 gs_base;
- } host;
-
- u64 spec_ctrl;
- /*
- * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
- * translated into the appropriate L2_CFG bits on the host to
- * perform speculative control.
- */
- u64 virt_spec_ctrl;
-
- u32 *msrpm;
-
- ulong nmi_iret_rip;
-
- struct nested_state nested;
-
- bool nmi_singlestep;
- u64 nmi_singlestep_guest_rflags;
-
- unsigned int3_injected;
- unsigned long int3_rip;
-
- /* cached guest cpuid flags for faster access */
- bool nrips_enabled : 1;
-
- u32 ldr_reg;
- u32 dfr_reg;
- struct page *avic_backing_page;
- u64 *avic_physical_id_cache;
- bool avic_is_running;
-
- /*
- * Per-vcpu list of struct amd_svm_iommu_ir:
- * This is used mainly to store interrupt remapping information used
- * when update the vcpu affinity. This avoids the need to scan for
- * IRTE and try to match ga_tag in the IOMMU driver.
- */
- struct list_head ir_list;
- spinlock_t ir_list_lock;
-
- /* which host CPU was used for running this vcpu */
- unsigned int last_cpu;
-};
-
/*
* This is a wrapper of struct amd_iommu_ir_data.
*/
@@ -269,8 +142,6 @@ struct amd_svm_iommu_ir {
static DEFINE_PER_CPU(u64, current_tsc_ratio);
#define TSC_RATIO_DEFAULT 0x0100000000ULL

-#define MSR_INVALID 0xffffffffU
-
static const struct svm_direct_access_msrs {
u32 index; /* Index of the MSR */
bool always; /* True if intercept is always on */
@@ -296,9 +167,9 @@ static const struct svm_direct_access_msrs {

/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static bool npt_enabled = true;
+bool npt_enabled = true;
#else
-static bool npt_enabled;
+bool npt_enabled;
#endif

/*
@@ -384,41 +255,10 @@ module_param(dump_invalid_vmcb, bool, 0644);

static u8 rsm_ins_bytes[] = "\x0f\xaa";

-static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
static void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
static inline void avic_post_state_restore(struct kvm_vcpu *vcpu);

-static int nested_svm_exit_handled(struct vcpu_svm *svm);
-static int nested_svm_intercept(struct vcpu_svm *svm);
-static int nested_svm_vmexit(struct vcpu_svm *svm);
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code);
-
-enum {
- VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
- pause filter count */
- VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
- VMCB_ASID, /* ASID */
- VMCB_INTR, /* int_ctl, int_vector */
- VMCB_NPT, /* npt_en, nCR3, gPAT */
- VMCB_CR, /* CR0, CR3, CR4, EFER */
- VMCB_DR, /* DR6, DR7 */
- VMCB_DT, /* GDT, IDT */
- VMCB_SEG, /* CS, DS, SS, ES, CPL */
- VMCB_CR2, /* CR2 only */
- VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
- VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
- * AVIC PHYSICAL_TABLE pointer,
- * AVIC LOGICAL_TABLE pointer
- */
- VMCB_DIRTY_MAX,
-};
-
-/* TPR and CR2 are always written before VMRUN */
-#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
-
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL

static int sev_flush_asids(void);
@@ -467,27 +307,6 @@ static inline int sev_get_asid(struct kvm *kvm)
return sev->asid;
}

-static inline void mark_all_dirty(struct vmcb *vmcb)
-{
- vmcb->control.clean = 0;
-}
-
-static inline void mark_all_clean(struct vmcb *vmcb)
-{
- vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
- & ~VMCB_ALWAYS_DIRTY_MASK;
-}
-
-static inline void mark_dirty(struct vmcb *vmcb, int bit)
-{
- vmcb->control.clean &= ~(1 << bit);
-}
-
-static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
-{
- return container_of(vcpu, struct vcpu_svm, vcpu);
-}
-
static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
{
svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
@@ -505,183 +324,6 @@ static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
}

-static void recalc_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb_control_area *c, *h;
- struct nested_state *g;
-
- mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-
- if (!is_guest_mode(&svm->vcpu))
- return;
-
- c = &svm->vmcb->control;
- h = &svm->nested.hsave->control;
- g = &svm->nested;
-
- c->intercept_cr = h->intercept_cr;
- c->intercept_dr = h->intercept_dr;
- c->intercept_exceptions = h->intercept_exceptions;
- c->intercept = h->intercept;
-
- if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
- /* We only want the cr8 intercept bits of L1 */
- c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ);
- c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE);
-
- /*
- * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
- * affect any interrupt we may want to inject; therefore,
- * interrupt window vmexits are irrelevant to L0.
- */
- c->intercept &= ~(1ULL << INTERCEPT_VINTR);
- }
-
- /* We don't want to see VMMCALLs from a nested guest */
- c->intercept &= ~(1ULL << INTERCEPT_VMMCALL);
-
- c->intercept_cr |= g->intercept_cr;
- c->intercept_dr |= g->intercept_dr;
- c->intercept_exceptions |= g->intercept_exceptions;
- c->intercept |= g->intercept;
-}
-
-static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
-{
- if (is_guest_mode(&svm->vcpu))
- return svm->nested.hsave;
- else
- return svm->vmcb;
-}
-
-static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_cr |= (1U << bit);
-
- recalc_intercepts(svm);
-}
-
-static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_cr &= ~(1U << bit);
-
- recalc_intercepts(svm);
-}
-
-static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- return vmcb->control.intercept_cr & (1U << bit);
-}
-
-static inline void set_dr_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
- | (1 << INTERCEPT_DR1_READ)
- | (1 << INTERCEPT_DR2_READ)
- | (1 << INTERCEPT_DR3_READ)
- | (1 << INTERCEPT_DR4_READ)
- | (1 << INTERCEPT_DR5_READ)
- | (1 << INTERCEPT_DR6_READ)
- | (1 << INTERCEPT_DR7_READ)
- | (1 << INTERCEPT_DR0_WRITE)
- | (1 << INTERCEPT_DR1_WRITE)
- | (1 << INTERCEPT_DR2_WRITE)
- | (1 << INTERCEPT_DR3_WRITE)
- | (1 << INTERCEPT_DR4_WRITE)
- | (1 << INTERCEPT_DR5_WRITE)
- | (1 << INTERCEPT_DR6_WRITE)
- | (1 << INTERCEPT_DR7_WRITE);
-
- recalc_intercepts(svm);
-}
-
-static inline void clr_dr_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_dr = 0;
-
- recalc_intercepts(svm);
-}
-
-static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_exceptions |= (1U << bit);
-
- recalc_intercepts(svm);
-}
-
-static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept_exceptions &= ~(1U << bit);
-
- recalc_intercepts(svm);
-}
-
-static inline void set_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept |= (1ULL << bit);
-
- recalc_intercepts(svm);
-}
-
-static inline void clr_intercept(struct vcpu_svm *svm, int bit)
-{
- struct vmcb *vmcb = get_host_vmcb(svm);
-
- vmcb->control.intercept &= ~(1ULL << bit);
-
- recalc_intercepts(svm);
-}
-
-static inline bool is_intercept(struct vcpu_svm *svm, int bit)
-{
- return (svm->vmcb->control.intercept & (1ULL << bit)) != 0;
-}
-
-static inline bool vgif_enabled(struct vcpu_svm *svm)
-{
- return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
-}
-
-static inline void enable_gif(struct vcpu_svm *svm)
-{
- if (vgif_enabled(svm))
- svm->vmcb->control.int_ctl |= V_GIF_MASK;
- else
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
-}
-
-static inline void disable_gif(struct vcpu_svm *svm)
-{
- if (vgif_enabled(svm))
- svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
-}
-
-static inline bool gif_set(struct vcpu_svm *svm)
-{
- if (vgif_enabled(svm))
- return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
- else
- return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
-}
-
static unsigned long iopm_base;

struct kvm_ldttss_desc {
@@ -717,7 +359,7 @@ static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
#define MSRS_RANGE_SIZE 2048
#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)

-static u32 svm_msrpm_offset(u32 msr)
+u32 svm_msrpm_offset(u32 msr)
{
u32 offset;
int i;
@@ -764,7 +406,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
#endif
}

-static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
vcpu->arch.efer = efer;

@@ -1195,7 +837,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}

-static void disable_nmi_singlestep(struct vcpu_svm *svm)
+void disable_nmi_singlestep(struct vcpu_svm *svm)
{
svm->nmi_singlestep = false;

@@ -2649,7 +2291,7 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
}
}

-static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);

@@ -2683,7 +2325,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
update_cr0_intercept(svm);
}

-static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE;
unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
@@ -3019,776 +2661,6 @@ static int vmmcall_interception(struct vcpu_svm *svm)
return kvm_emulate_hypercall(&svm->vcpu);
}

-static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- return svm->nested.nested_cr3;
-}
-
-static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- u64 cr3 = svm->nested.nested_cr3;
- u64 pdpte;
- int ret;
-
- ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte,
- offset_in_page(cr3) + index * 8, 8);
- if (ret)
- return 0;
- return pdpte;
-}
-
-static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
-
- if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) {
- /*
- * TODO: track the cause of the nested page fault, and
- * correctly fill in the high bits of exit_info_1.
- */
- svm->vmcb->control.exit_code = SVM_EXIT_NPF;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = (1ULL << 32);
- svm->vmcb->control.exit_info_2 = fault->address;
- }
-
- svm->vmcb->control.exit_info_1 &= ~0xffffffffULL;
- svm->vmcb->control.exit_info_1 |= fault->error_code;
-
- /*
- * The present bit is always zero for page structure faults on real
- * hardware.
- */
- if (svm->vmcb->control.exit_info_1 & (2ULL << 32))
- svm->vmcb->control.exit_info_1 &= ~1;
-
- nested_svm_vmexit(svm);
-}
-
-static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
-{
- WARN_ON(mmu_is_nested(vcpu));
-
- vcpu->arch.mmu = &vcpu->arch.guest_mmu;
- kvm_init_shadow_mmu(vcpu);
- vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3;
- vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr;
- vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
- vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu);
- reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
- vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
-}
-
-static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.mmu = &vcpu->arch.root_mmu;
- vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
-}
-
-static int nested_svm_check_permissions(struct vcpu_svm *svm)
-{
- if (!(svm->vcpu.arch.efer & EFER_SVME) ||
- !is_paging(&svm->vcpu)) {
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
- return 1;
- }
-
- if (svm->vmcb->save.cpl) {
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- }
-
- return 0;
-}
-
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code)
-{
- int vmexit;
-
- if (!is_guest_mode(&svm->vcpu))
- return 0;
-
- vmexit = nested_svm_intercept(svm);
- if (vmexit != NESTED_EXIT_DONE)
- return 0;
-
- svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = error_code;
-
- /*
- * EXITINFO2 is undefined for all exception intercepts other
- * than #PF.
- */
- if (svm->vcpu.arch.exception.nested_apf)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
- else if (svm->vcpu.arch.exception.has_payload)
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload;
- else
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
- svm->nested.exit_required = true;
- return vmexit;
-}
-
-static void nested_svm_intr(struct vcpu_svm *svm)
-{
- svm->vmcb->control.exit_code = SVM_EXIT_INTR;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- /* nested_svm_vmexit this gets called afterwards from handle_exit */
- svm->nested.exit_required = true;
- trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
-}
-
-static bool nested_exit_on_intr(struct vcpu_svm *svm)
-{
- return (svm->nested.intercept & 1ULL);
-}
-
-static int svm_check_nested_events(struct kvm_vcpu *vcpu)
-{
- struct vcpu_svm *svm = to_svm(vcpu);
- bool block_nested_events =
- kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required;
-
- if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) {
- if (block_nested_events)
- return -EBUSY;
- nested_svm_intr(svm);
- return 0;
- }
-
- return 0;
-}
-
-/* This function returns true if it is save to enable the nmi window */
-static inline bool nested_svm_nmi(struct vcpu_svm *svm)
-{
- if (!is_guest_mode(&svm->vcpu))
- return true;
-
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
- return true;
-
- svm->vmcb->control.exit_code = SVM_EXIT_NMI;
- svm->nested.exit_required = true;
-
- return false;
-}
-
-static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
-{
- unsigned port, size, iopm_len;
- u16 val, mask;
- u8 start_bit;
- u64 gpa;
-
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
- return NESTED_EXIT_HOST;
-
- port = svm->vmcb->control.exit_info_1 >> 16;
- size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
- SVM_IOIO_SIZE_SHIFT;
- gpa = svm->nested.vmcb_iopm + (port / 8);
- start_bit = port % 8;
- iopm_len = (start_bit + size > 8) ? 2 : 1;
- mask = (0xf >> (4 - size)) << start_bit;
- val = 0;
-
- if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
- return NESTED_EXIT_DONE;
-
- return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
-}
-
-static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
-{
- u32 offset, msr, value;
- int write, mask;
-
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
- return NESTED_EXIT_HOST;
-
- msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
- offset = svm_msrpm_offset(msr);
- write = svm->vmcb->control.exit_info_1 & 1;
- mask = 1 << ((2 * (msr & 0xf)) + write);
-
- if (offset == MSR_INVALID)
- return NESTED_EXIT_DONE;
-
- /* Offset is in 32 bit units but need in 8 bit units */
- offset *= 4;
-
- if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4))
- return NESTED_EXIT_DONE;
-
- return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
-}
-
-/* DB exceptions for our internal use must not cause vmexit */
-static int nested_svm_intercept_db(struct vcpu_svm *svm)
-{
- unsigned long dr6;
-
- /* if we're not singlestepping, it's not ours */
- if (!svm->nmi_singlestep)
- return NESTED_EXIT_DONE;
-
- /* if it's not a singlestep exception, it's not ours */
- if (kvm_get_dr(&svm->vcpu, 6, &dr6))
- return NESTED_EXIT_DONE;
- if (!(dr6 & DR6_BS))
- return NESTED_EXIT_DONE;
-
- /* if the guest is singlestepping, it should get the vmexit */
- if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) {
- disable_nmi_singlestep(svm);
- return NESTED_EXIT_DONE;
- }
-
- /* it's ours, the nested hypervisor must not see this one */
- return NESTED_EXIT_HOST;
-}
-
-static int nested_svm_exit_special(struct vcpu_svm *svm)
-{
- u32 exit_code = svm->vmcb->control.exit_code;
-
- switch (exit_code) {
- case SVM_EXIT_INTR:
- case SVM_EXIT_NMI:
- case SVM_EXIT_EXCP_BASE + MC_VECTOR:
- return NESTED_EXIT_HOST;
- case SVM_EXIT_NPF:
- /* For now we are always handling NPFs when using them */
- if (npt_enabled)
- return NESTED_EXIT_HOST;
- break;
- case SVM_EXIT_EXCP_BASE + PF_VECTOR:
- /* When we're shadowing, trap PFs, but not async PF */
- if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
- return NESTED_EXIT_HOST;
- break;
- default:
- break;
- }
-
- return NESTED_EXIT_CONTINUE;
-}
-
-static int nested_svm_intercept(struct vcpu_svm *svm)
-{
- u32 exit_code = svm->vmcb->control.exit_code;
- int vmexit = NESTED_EXIT_HOST;
-
- switch (exit_code) {
- case SVM_EXIT_MSR:
- vmexit = nested_svm_exit_handled_msr(svm);
- break;
- case SVM_EXIT_IOIO:
- vmexit = nested_svm_intercept_ioio(svm);
- break;
- case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
- u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
- if (svm->nested.intercept_cr & bit)
- vmexit = NESTED_EXIT_DONE;
- break;
- }
- case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
- u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
- if (svm->nested.intercept_dr & bit)
- vmexit = NESTED_EXIT_DONE;
- break;
- }
- case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
- u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (svm->nested.intercept_exceptions & excp_bits) {
- if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR)
- vmexit = nested_svm_intercept_db(svm);
- else
- vmexit = NESTED_EXIT_DONE;
- }
- /* async page fault always cause vmexit */
- else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
- svm->vcpu.arch.exception.nested_apf != 0)
- vmexit = NESTED_EXIT_DONE;
- break;
- }
- case SVM_EXIT_ERR: {
- vmexit = NESTED_EXIT_DONE;
- break;
- }
- default: {
- u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- if (svm->nested.intercept & exit_bits)
- vmexit = NESTED_EXIT_DONE;
- }
- }
-
- return vmexit;
-}
-
-static int nested_svm_exit_handled(struct vcpu_svm *svm)
-{
- int vmexit;
-
- vmexit = nested_svm_intercept(svm);
-
- if (vmexit == NESTED_EXIT_DONE)
- nested_svm_vmexit(svm);
-
- return vmexit;
-}
-
-static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
-{
- struct vmcb_control_area *dst = &dst_vmcb->control;
- struct vmcb_control_area *from = &from_vmcb->control;
-
- dst->intercept_cr = from->intercept_cr;
- dst->intercept_dr = from->intercept_dr;
- dst->intercept_exceptions = from->intercept_exceptions;
- dst->intercept = from->intercept;
- dst->iopm_base_pa = from->iopm_base_pa;
- dst->msrpm_base_pa = from->msrpm_base_pa;
- dst->tsc_offset = from->tsc_offset;
- dst->asid = from->asid;
- dst->tlb_ctl = from->tlb_ctl;
- dst->int_ctl = from->int_ctl;
- dst->int_vector = from->int_vector;
- dst->int_state = from->int_state;
- dst->exit_code = from->exit_code;
- dst->exit_code_hi = from->exit_code_hi;
- dst->exit_info_1 = from->exit_info_1;
- dst->exit_info_2 = from->exit_info_2;
- dst->exit_int_info = from->exit_int_info;
- dst->exit_int_info_err = from->exit_int_info_err;
- dst->nested_ctl = from->nested_ctl;
- dst->event_inj = from->event_inj;
- dst->event_inj_err = from->event_inj_err;
- dst->nested_cr3 = from->nested_cr3;
- dst->virt_ext = from->virt_ext;
- dst->pause_filter_count = from->pause_filter_count;
- dst->pause_filter_thresh = from->pause_filter_thresh;
-}
-
-static int nested_svm_vmexit(struct vcpu_svm *svm)
-{
- int rc;
- struct vmcb *nested_vmcb;
- struct vmcb *hsave = svm->nested.hsave;
- struct vmcb *vmcb = svm->vmcb;
- struct kvm_host_map map;
-
- trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
- vmcb->control.exit_info_1,
- vmcb->control.exit_info_2,
- vmcb->control.exit_int_info,
- vmcb->control.exit_int_info_err,
- KVM_ISA_SVM);
-
- rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
- if (rc) {
- if (rc == -EINVAL)
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- }
-
- nested_vmcb = map.hva;
-
- /* Exit Guest-Mode */
- leave_guest_mode(&svm->vcpu);
- svm->nested.vmcb = 0;
-
- /* Give the current vmcb to the guest */
- disable_gif(svm);
-
- nested_vmcb->save.es = vmcb->save.es;
- nested_vmcb->save.cs = vmcb->save.cs;
- nested_vmcb->save.ss = vmcb->save.ss;
- nested_vmcb->save.ds = vmcb->save.ds;
- nested_vmcb->save.gdtr = vmcb->save.gdtr;
- nested_vmcb->save.idtr = vmcb->save.idtr;
- nested_vmcb->save.efer = svm->vcpu.arch.efer;
- nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu);
- nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu);
- nested_vmcb->save.cr2 = vmcb->save.cr2;
- nested_vmcb->save.cr4 = svm->vcpu.arch.cr4;
- nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
- nested_vmcb->save.rip = vmcb->save.rip;
- nested_vmcb->save.rsp = vmcb->save.rsp;
- nested_vmcb->save.rax = vmcb->save.rax;
- nested_vmcb->save.dr7 = vmcb->save.dr7;
- nested_vmcb->save.dr6 = vmcb->save.dr6;
- nested_vmcb->save.cpl = vmcb->save.cpl;
-
- nested_vmcb->control.int_ctl = vmcb->control.int_ctl;
- nested_vmcb->control.int_vector = vmcb->control.int_vector;
- nested_vmcb->control.int_state = vmcb->control.int_state;
- nested_vmcb->control.exit_code = vmcb->control.exit_code;
- nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi;
- nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1;
- nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2;
- nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info;
- nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
-
- if (svm->nrips_enabled)
- nested_vmcb->control.next_rip = vmcb->control.next_rip;
-
- /*
- * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
- * to make sure that we do not lose injected events. So check event_inj
- * here and copy it to exit_int_info if it is valid.
- * Exit_int_info and event_inj can't be both valid because the case
- * below only happens on a VMRUN instruction intercept which has
- * no valid exit_int_info set.
- */
- if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
- struct vmcb_control_area *nc = &nested_vmcb->control;
-
- nc->exit_int_info = vmcb->control.event_inj;
- nc->exit_int_info_err = vmcb->control.event_inj_err;
- }
-
- nested_vmcb->control.tlb_ctl = 0;
- nested_vmcb->control.event_inj = 0;
- nested_vmcb->control.event_inj_err = 0;
-
- nested_vmcb->control.pause_filter_count =
- svm->vmcb->control.pause_filter_count;
- nested_vmcb->control.pause_filter_thresh =
- svm->vmcb->control.pause_filter_thresh;
-
- /* We always set V_INTR_MASKING and remember the old value in hflags */
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
- /* Restore the original control entries */
- copy_vmcb_control_area(vmcb, hsave);
-
- svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset;
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
-
- svm->nested.nested_cr3 = 0;
-
- /* Restore selected save entries */
- svm->vmcb->save.es = hsave->save.es;
- svm->vmcb->save.cs = hsave->save.cs;
- svm->vmcb->save.ss = hsave->save.ss;
- svm->vmcb->save.ds = hsave->save.ds;
- svm->vmcb->save.gdtr = hsave->save.gdtr;
- svm->vmcb->save.idtr = hsave->save.idtr;
- kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
- svm_set_efer(&svm->vcpu, hsave->save.efer);
- svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
- svm_set_cr4(&svm->vcpu, hsave->save.cr4);
- if (npt_enabled) {
- svm->vmcb->save.cr3 = hsave->save.cr3;
- svm->vcpu.arch.cr3 = hsave->save.cr3;
- } else {
- (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
- }
- kvm_rax_write(&svm->vcpu, hsave->save.rax);
- kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
- kvm_rip_write(&svm->vcpu, hsave->save.rip);
- svm->vmcb->save.dr7 = 0;
- svm->vmcb->save.cpl = 0;
- svm->vmcb->control.exit_int_info = 0;
-
- mark_all_dirty(svm->vmcb);
-
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
-
- nested_svm_uninit_mmu_context(&svm->vcpu);
- kvm_mmu_reset_context(&svm->vcpu);
- kvm_mmu_load(&svm->vcpu);
-
- /*
- * Drop what we picked up for L2 via svm_complete_interrupts() so it
- * doesn't end up in L1.
- */
- svm->vcpu.arch.nmi_injected = false;
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
-
- return 0;
-}
-
-static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
-{
- /*
- * This function merges the msr permission bitmaps of kvm and the
- * nested vmcb. It is optimized in that it only merges the parts where
- * the kvm msr permission bitmap may contain zero bits
- */
- int i;
-
- if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
- return true;
-
- for (i = 0; i < MSRPM_OFFSETS; i++) {
- u32 value, p;
- u64 offset;
-
- if (msrpm_offsets[i] == 0xffffffff)
- break;
-
- p = msrpm_offsets[i];
- offset = svm->nested.vmcb_msrpm + (p * 4);
-
- if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4))
- return false;
-
- svm->nested.msrpm[p] = svm->msrpm[p] | value;
- }
-
- svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
-
- return true;
-}
-
-static bool nested_vmcb_checks(struct vmcb *vmcb)
-{
- if ((vmcb->save.efer & EFER_SVME) == 0)
- return false;
-
- if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
- return false;
-
- if (vmcb->control.asid == 0)
- return false;
-
- if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
- !npt_enabled)
- return false;
-
- return true;
-}
-
-static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct kvm_host_map *map)
-{
- bool evaluate_pending_interrupts =
- is_intercept(svm, INTERCEPT_VINTR) ||
- is_intercept(svm, INTERCEPT_IRET);
-
- if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
- svm->vcpu.arch.hflags |= HF_HIF_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
-
- if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
- svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
- nested_svm_init_mmu_context(&svm->vcpu);
- }
-
- /* Load the nested guest state */
- svm->vmcb->save.es = nested_vmcb->save.es;
- svm->vmcb->save.cs = nested_vmcb->save.cs;
- svm->vmcb->save.ss = nested_vmcb->save.ss;
- svm->vmcb->save.ds = nested_vmcb->save.ds;
- svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
- svm->vmcb->save.idtr = nested_vmcb->save.idtr;
- kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
- svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
- svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
- svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
- if (npt_enabled) {
- svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
- svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
- } else
- (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
-
- /* Guest paging mode is active - reset mmu */
- kvm_mmu_reset_context(&svm->vcpu);
-
- svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
- kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
- kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
- kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
-
- /* In case we don't even reach vcpu_run, the fields are not updated */
- svm->vmcb->save.rax = nested_vmcb->save.rax;
- svm->vmcb->save.rsp = nested_vmcb->save.rsp;
- svm->vmcb->save.rip = nested_vmcb->save.rip;
- svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
- svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
- svm->vmcb->save.cpl = nested_vmcb->save.cpl;
-
- svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
- svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL;
-
- /* cache intercepts */
- svm->nested.intercept_cr = nested_vmcb->control.intercept_cr;
- svm->nested.intercept_dr = nested_vmcb->control.intercept_dr;
- svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
- svm->nested.intercept = nested_vmcb->control.intercept;
-
- svm_flush_tlb(&svm->vcpu, true);
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
- svm->vcpu.arch.hflags |= HF_VINTR_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
-
- svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
- svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
-
- svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
- svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
- svm->vmcb->control.int_state = nested_vmcb->control.int_state;
- svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
- svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
-
- svm->vmcb->control.pause_filter_count =
- nested_vmcb->control.pause_filter_count;
- svm->vmcb->control.pause_filter_thresh =
- nested_vmcb->control.pause_filter_thresh;
-
- kvm_vcpu_unmap(&svm->vcpu, map, true);
-
- /* Enter Guest-Mode */
- enter_guest_mode(&svm->vcpu);
-
- /*
- * Merge guest and host intercepts - must be called with vcpu in
- * guest-mode to take affect here
- */
- recalc_intercepts(svm);
-
- svm->nested.vmcb = vmcb_gpa;
-
- /*
- * If L1 had a pending IRQ/NMI before executing VMRUN,
- * which wasn't delivered because it was disallowed (e.g.
- * interrupts disabled), L0 needs to evaluate if this pending
- * event should cause an exit from L2 to L1 or be delivered
- * directly to L2.
- *
- * Usually this would be handled by the processor noticing an
- * IRQ/NMI window request. However, VMRUN can unblock interrupts
- * by implicitly setting GIF, so force L0 to perform pending event
- * evaluation by requesting a KVM_REQ_EVENT.
- */
- enable_gif(svm);
- if (unlikely(evaluate_pending_interrupts))
- kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
-
- mark_all_dirty(svm->vmcb);
-}
-
-static int nested_svm_vmrun(struct vcpu_svm *svm)
-{
- int ret;
- struct vmcb *nested_vmcb;
- struct vmcb *hsave = svm->nested.hsave;
- struct vmcb *vmcb = svm->vmcb;
- struct kvm_host_map map;
- u64 vmcb_gpa;
-
- vmcb_gpa = svm->vmcb->save.rax;
-
- ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
- if (ret == -EINVAL) {
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- } else if (ret) {
- return kvm_skip_emulated_instruction(&svm->vcpu);
- }
-
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
-
- nested_vmcb = map.hva;
-
- if (!nested_vmcb_checks(nested_vmcb)) {
- nested_vmcb->control.exit_code = SVM_EXIT_ERR;
- nested_vmcb->control.exit_code_hi = 0;
- nested_vmcb->control.exit_info_1 = 0;
- nested_vmcb->control.exit_info_2 = 0;
-
- kvm_vcpu_unmap(&svm->vcpu, &map, true);
-
- return ret;
- }
-
- trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
- nested_vmcb->save.rip,
- nested_vmcb->control.int_ctl,
- nested_vmcb->control.event_inj,
- nested_vmcb->control.nested_ctl);
-
- trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
- nested_vmcb->control.intercept_cr >> 16,
- nested_vmcb->control.intercept_exceptions,
- nested_vmcb->control.intercept);
-
- /* Clear internal status */
- kvm_clear_exception_queue(&svm->vcpu);
- kvm_clear_interrupt_queue(&svm->vcpu);
-
- /*
- * Save the old vmcb, so we don't need to pick what we save, but can
- * restore everything when a VMEXIT occurs
- */
- hsave->save.es = vmcb->save.es;
- hsave->save.cs = vmcb->save.cs;
- hsave->save.ss = vmcb->save.ss;
- hsave->save.ds = vmcb->save.ds;
- hsave->save.gdtr = vmcb->save.gdtr;
- hsave->save.idtr = vmcb->save.idtr;
- hsave->save.efer = svm->vcpu.arch.efer;
- hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
- hsave->save.cr4 = svm->vcpu.arch.cr4;
- hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
- hsave->save.rip = kvm_rip_read(&svm->vcpu);
- hsave->save.rsp = vmcb->save.rsp;
- hsave->save.rax = vmcb->save.rax;
- if (npt_enabled)
- hsave->save.cr3 = vmcb->save.cr3;
- else
- hsave->save.cr3 = kvm_read_cr3(&svm->vcpu);
-
- copy_vmcb_control_area(hsave, vmcb);
-
- enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
-
- if (!nested_svm_vmrun_msrpm(svm)) {
- svm->vmcb->control.exit_code = SVM_EXIT_ERR;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- nested_svm_vmexit(svm);
- }
-
- return ret;
-}
-
-static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
-{
- to_vmcb->save.fs = from_vmcb->save.fs;
- to_vmcb->save.gs = from_vmcb->save.gs;
- to_vmcb->save.tr = from_vmcb->save.tr;
- to_vmcb->save.ldtr = from_vmcb->save.ldtr;
- to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
- to_vmcb->save.star = from_vmcb->save.star;
- to_vmcb->save.lstar = from_vmcb->save.lstar;
- to_vmcb->save.cstar = from_vmcb->save.cstar;
- to_vmcb->save.sfmask = from_vmcb->save.sfmask;
- to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
- to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
- to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
-}
-
static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
@@ -5183,11 +4055,6 @@ static void svm_set_irq(struct kvm_vcpu *vcpu)
SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
}

-static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
-{
- return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
-}
-
static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5629,7 +4496,7 @@ static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
return 0;
}

-static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);

diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
new file mode 100644
index 000000000000..f4c446d7a31e
--- /dev/null
+++ b/arch/x86/kvm/svm/svm.h
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * AMD SVM support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Authors:
+ * Yaniv Kamay <yaniv@xxxxxxxxxxxx>
+ * Avi Kivity <avi@xxxxxxxxxxxx>
+ */
+
+#ifndef __SVM_SVM_H
+#define __SVM_SVM_H
+
+#include <linux/kvm_types.h>
+#include <linux/kvm_host.h>
+
+#include <asm/svm.h>
+
+static const u32 host_save_user_msrs[] = {
+#ifdef CONFIG_X86_64
+ MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
+ MSR_FS_BASE,
+#endif
+ MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+ MSR_TSC_AUX,
+};
+
+#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+
+#define MSRPM_OFFSETS 16
+extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+extern bool npt_enabled;
+
+enum {
+ VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
+ pause filter count */
+ VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */
+ VMCB_ASID, /* ASID */
+ VMCB_INTR, /* int_ctl, int_vector */
+ VMCB_NPT, /* npt_en, nCR3, gPAT */
+ VMCB_CR, /* CR0, CR3, CR4, EFER */
+ VMCB_DR, /* DR6, DR7 */
+ VMCB_DT, /* GDT, IDT */
+ VMCB_SEG, /* CS, DS, SS, ES, CPL */
+ VMCB_CR2, /* CR2 only */
+ VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
+ VMCB_AVIC, /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
+ * AVIC PHYSICAL_TABLE pointer,
+ * AVIC LOGICAL_TABLE pointer
+ */
+ VMCB_DIRTY_MAX,
+};
+
+/* TPR and CR2 are always written before VMRUN */
+#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
+
+struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
+};
+
+struct kvm_svm {
+ struct kvm kvm;
+
+ /* Struct members for AVIC */
+ u32 avic_vm_id;
+ struct page *avic_logical_id_table_page;
+ struct page *avic_physical_id_table_page;
+ struct hlist_node hnode;
+
+ struct kvm_sev_info sev_info;
+};
+
+struct kvm_vcpu;
+
+struct nested_state {
+ struct vmcb *hsave;
+ u64 hsave_msr;
+ u64 vm_cr_msr;
+ u64 vmcb;
+
+ /* These are the merged vectors */
+ u32 *msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 vmcb_msrpm;
+ u64 vmcb_iopm;
+
+ /* A VMEXIT is required but not yet emulated */
+ bool exit_required;
+
+ /* cache for intercepts of the guest */
+ u32 intercept_cr;
+ u32 intercept_dr;
+ u32 intercept_exceptions;
+ u64 intercept;
+
+ /* Nested Paging related state */
+ u64 nested_cr3;
+};
+
+struct vcpu_svm {
+ struct kvm_vcpu vcpu;
+ struct vmcb *vmcb;
+ unsigned long vmcb_pa;
+ struct svm_cpu_data *svm_data;
+ uint64_t asid_generation;
+ uint64_t sysenter_esp;
+ uint64_t sysenter_eip;
+ uint64_t tsc_aux;
+
+ u64 msr_decfg;
+
+ u64 next_rip;
+
+ u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
+ struct {
+ u16 fs;
+ u16 gs;
+ u16 ldt;
+ u64 gs_base;
+ } host;
+
+ u64 spec_ctrl;
+ /*
+ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+ * translated into the appropriate L2_CFG bits on the host to
+ * perform speculative control.
+ */
+ u64 virt_spec_ctrl;
+
+ u32 *msrpm;
+
+ ulong nmi_iret_rip;
+
+ struct nested_state nested;
+
+ bool nmi_singlestep;
+ u64 nmi_singlestep_guest_rflags;
+
+ unsigned int3_injected;
+ unsigned long int3_rip;
+
+ /* cached guest cpuid flags for faster access */
+ bool nrips_enabled : 1;
+
+ u32 ldr_reg;
+ u32 dfr_reg;
+ struct page *avic_backing_page;
+ u64 *avic_physical_id_cache;
+ bool avic_is_running;
+
+ /*
+ * Per-vcpu list of struct amd_svm_iommu_ir:
+ * This is used mainly to store interrupt remapping information used
+ * when update the vcpu affinity. This avoids the need to scan for
+ * IRTE and try to match ga_tag in the IOMMU driver.
+ */
+ struct list_head ir_list;
+ spinlock_t ir_list_lock;
+
+ /* which host CPU was used for running this vcpu */
+ unsigned int last_cpu;
+};
+
+void recalc_intercepts(struct vcpu_svm *svm);
+
+static inline void mark_all_dirty(struct vmcb *vmcb)
+{
+ vmcb->control.clean = 0;
+}
+
+static inline void mark_all_clean(struct vmcb *vmcb)
+{
+ vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
+ & ~VMCB_ALWAYS_DIRTY_MASK;
+}
+
+static inline void mark_dirty(struct vmcb *vmcb, int bit)
+{
+ vmcb->control.clean &= ~(1 << bit);
+}
+
+static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct vcpu_svm, vcpu);
+}
+
+static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
+{
+ if (is_guest_mode(&svm->vcpu))
+ return svm->nested.hsave;
+ else
+ return svm->vmcb;
+}
+
+static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept_cr |= (1U << bit);
+
+ recalc_intercepts(svm);
+}
+
+static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept_cr &= ~(1U << bit);
+
+ recalc_intercepts(svm);
+}
+
+static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ return vmcb->control.intercept_cr & (1U << bit);
+}
+
+static inline void set_dr_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
+ | (1 << INTERCEPT_DR1_READ)
+ | (1 << INTERCEPT_DR2_READ)
+ | (1 << INTERCEPT_DR3_READ)
+ | (1 << INTERCEPT_DR4_READ)
+ | (1 << INTERCEPT_DR5_READ)
+ | (1 << INTERCEPT_DR6_READ)
+ | (1 << INTERCEPT_DR7_READ)
+ | (1 << INTERCEPT_DR0_WRITE)
+ | (1 << INTERCEPT_DR1_WRITE)
+ | (1 << INTERCEPT_DR2_WRITE)
+ | (1 << INTERCEPT_DR3_WRITE)
+ | (1 << INTERCEPT_DR4_WRITE)
+ | (1 << INTERCEPT_DR5_WRITE)
+ | (1 << INTERCEPT_DR6_WRITE)
+ | (1 << INTERCEPT_DR7_WRITE);
+
+ recalc_intercepts(svm);
+}
+
+static inline void clr_dr_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept_dr = 0;
+
+ recalc_intercepts(svm);
+}
+
+static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept_exceptions |= (1U << bit);
+
+ recalc_intercepts(svm);
+}
+
+static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept_exceptions &= ~(1U << bit);
+
+ recalc_intercepts(svm);
+}
+
+static inline void set_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept |= (1ULL << bit);
+
+ recalc_intercepts(svm);
+}
+
+static inline void clr_intercept(struct vcpu_svm *svm, int bit)
+{
+ struct vmcb *vmcb = get_host_vmcb(svm);
+
+ vmcb->control.intercept &= ~(1ULL << bit);
+
+ recalc_intercepts(svm);
+}
+
+static inline bool is_intercept(struct vcpu_svm *svm, int bit)
+{
+ return (svm->vmcb->control.intercept & (1ULL << bit)) != 0;
+}
+
+static inline bool vgif_enabled(struct vcpu_svm *svm)
+{
+ return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
+}
+
+static inline void enable_gif(struct vcpu_svm *svm)
+{
+ if (vgif_enabled(svm))
+ svm->vmcb->control.int_ctl |= V_GIF_MASK;
+ else
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+}
+
+static inline void disable_gif(struct vcpu_svm *svm)
+{
+ if (vgif_enabled(svm))
+ svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+}
+
+static inline bool gif_set(struct vcpu_svm *svm)
+{
+ if (vgif_enabled(svm))
+ return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
+ else
+ return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
+}
+
+/* svm.c */
+#define MSR_INVALID 0xffffffffU
+
+u32 svm_msrpm_offset(u32 msr);
+void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
+void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
+int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
+void disable_nmi_singlestep(struct vcpu_svm *svm);
+
+/* nested.c */
+
+#define NESTED_EXIT_HOST 0 /* Exit handled on host level */
+#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */
+#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */
+
+/* This function returns true if it is save to enable the nmi window */
+static inline bool nested_svm_nmi(struct vcpu_svm *svm)
+{
+ if (!is_guest_mode(&svm->vcpu))
+ return true;
+
+ if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
+ return true;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_NMI;
+ svm->nested.exit_required = true;
+
+ return false;
+}
+
+static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
+{
+ return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
+}
+
+void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
+ struct vmcb *nested_vmcb, struct kvm_host_map *map);
+int nested_svm_vmrun(struct vcpu_svm *svm);
+void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
+int nested_svm_vmexit(struct vcpu_svm *svm);
+int nested_svm_exit_handled(struct vcpu_svm *svm);
+int nested_svm_check_permissions(struct vcpu_svm *svm);
+int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code);
+int svm_check_nested_events(struct kvm_vcpu *vcpu);
+int nested_svm_exit_special(struct vcpu_svm *svm);
+
+#endif
--
2.17.1