[RFC v2 25/26] KVM: x86/asi: Switch to KVM address space on entry to guest

From: Alexandre Chartre
Date: Thu Jul 11 2019 - 10:28:39 EST


From: Liran Alon <liran.alon@xxxxxxxxxx>

Switch to KVM address space on entry to guest. Most of KVM #VMExit
handlers will run in KVM isolated address space and switch back to
host address space only before accessing sensitive data. Sensitive
data is defined as either host data or other VM data.

Currently, we switch back to the host address space on the following
scenarios:
1) When handling guest page-faults:
As this will access SPTs which contains host PFNs.
2) On schedule-out of vCPU thread
3) On write to guest virtual memory
(kvm_write_guest_virt_system() can pull in tons of pages)
4) On return to userspace (e.g. QEMU)
5) On interrupt or exception

Signed-off-by: Liran Alon <liran.alon@xxxxxxxxxx>
Signed-off-by: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx>
---
arch/x86/kvm/mmu.c | 2 +-
arch/x86/kvm/vmx/isolation.c | 2 +-
arch/x86/kvm/vmx/vmx.c | 6 ++++++
arch/x86/kvm/vmx/vmx.h | 18 ++++++++++++++++++
arch/x86/kvm/x86.c | 34 +++++++++++++++++++++++++++++++++-
arch/x86/kvm/x86.h | 1 +
6 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 98f6e4f..298f602 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4067,7 +4067,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
{
int r = 1;

- vcpu->arch.l1tf_flush_l1d = true;
+ kvm_may_access_sensitive_data(vcpu);
switch (vcpu->arch.apf.host_apf_reason) {
default:
trace_kvm_page_fault(fault_address, error_code);
diff --git a/arch/x86/kvm/vmx/isolation.c b/arch/x86/kvm/vmx/isolation.c
index d82f6b6..8f57f10 100644
--- a/arch/x86/kvm/vmx/isolation.c
+++ b/arch/x86/kvm/vmx/isolation.c
@@ -34,7 +34,7 @@
* This is set to false by default because it incurs a performance hit
* which some users will not want to take for security gain.
*/
-static bool __read_mostly address_space_isolation;
+bool __read_mostly address_space_isolation;
module_param(address_space_isolation, bool, 0444);

/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d47f093..b5867cc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6458,8 +6458,14 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->arch.cr2 != read_cr2())
write_cr2(vcpu->arch.cr2);

+ /*
+ * Use an isolation barrier as VMExit will restore the isolation
+ * CR3 while interrupts can abort isolation.
+ */
+ vmx_isolation_barrier_begin(vmx);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
vmx->loaded_vmcs->launched);
+ vmx_isolation_barrier_end(vmx);

vcpu->arch.cr2 = read_cr2();

diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e8de23b..b65f059 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -531,4 +531,22 @@ static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
int vmx_isolation_init(struct vcpu_vmx *vmx);
void vmx_isolation_uninit(struct vcpu_vmx *vmx);

+extern bool __read_mostly address_space_isolation;
+
+static inline void vmx_isolation_barrier_begin(struct vcpu_vmx *vmx)
+{
+ if (!address_space_isolation || !vmx->vcpu.asi)
+ return;
+
+ asi_barrier_begin();
+}
+
+static inline void vmx_isolation_barrier_end(struct vcpu_vmx *vmx)
+{
+ if (!address_space_isolation || !vmx->vcpu.asi)
+ return;
+
+ asi_barrier_end();
+}
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9857992..9458413 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3346,6 +3346,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
* guest. do_debug expects dr6 to be cleared after it runs, do the same.
*/
set_debugreg(0, 6);
+
+ kvm_may_access_sensitive_data(vcpu);
}

static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -5259,7 +5261,7 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
unsigned int bytes, struct x86_exception *exception)
{
/* kvm_write_guest_virt_system can pull in tons of pages. */
- vcpu->arch.l1tf_flush_l1d = true;
+ kvm_may_access_sensitive_data(vcpu);

return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
PFERR_WRITE_MASK, exception);
@@ -7744,6 +7746,32 @@ void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);

+static void vcpu_isolation_enter(struct kvm_vcpu *vcpu)
+{
+ int err;
+
+ if (!vcpu->asi)
+ return;
+
+ err = asi_enter(vcpu->asi);
+ if (err)
+ pr_debug("KVM isolation failed: error %d\n", err);
+}
+
+static void vcpu_isolation_exit(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->asi)
+ return;
+
+ asi_exit(vcpu->asi);
+}
+
+void kvm_may_access_sensitive_data(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.l1tf_flush_l1d = true;
+ vcpu_isolation_exit(vcpu);
+}
+
/*
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -7944,6 +7972,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto cancel_injection;
}

+ vcpu_isolation_enter(vcpu);
+
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_x86_ops->request_immediate_exit(vcpu);
@@ -8130,6 +8160,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)

srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);

+ kvm_may_access_sensitive_data(vcpu);
+
return r;
}

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index a470ff0..69a7402 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -356,5 +356,6 @@ static inline bool kvm_pat_valid(u64 data)

void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu);
void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu);
+void kvm_may_access_sensitive_data(struct kvm_vcpu *vcpu);

#endif
--
1.7.1