[PATCH RFC 07/15] KVM: X86: Implement ring-based dirty memory tracking

From: Peter Xu
Date: Fri Nov 29 2019 - 16:36:09 EST


From: "Cao, Lei" <Lei.Cao@xxxxxxxxxxx>

Add new KVM exit reason KVM_EXIT_DIRTY_LOG_FULL and connect
KVM_REQ_DIRTY_LOG_FULL to it.

Signed-off-by: Lei Cao <lei.cao@xxxxxxxxxxx>
Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
[peterx: rebase, return 0 instead of -EINTR for user exits,
emul_insn before exit to userspace]
Signed-off-by: Peter Xu <peterx@xxxxxxxxxx>
---
arch/x86/include/asm/kvm_host.h | 5 +++++
arch/x86/include/uapi/asm/kvm.h | 1 +
arch/x86/kvm/mmu/mmu.c | 6 ++++++
arch/x86/kvm/vmx/vmx.c | 7 +++++++
arch/x86/kvm/x86.c | 12 ++++++++++++
5 files changed, 31 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b79cd6aa4075..67521627f9e4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -49,6 +49,8 @@

#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS

+#define KVM_DIRTY_RING_VERSION 1
+
/* x86-specific vcpu->requests bit members */
#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
@@ -1176,6 +1178,7 @@ struct kvm_x86_ops {
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
int (*write_log_dirty)(struct kvm_vcpu *vcpu);
+ int (*cpu_dirty_log_size)(void);

/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
@@ -1661,4 +1664,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define GET_SMSTATE(type, buf, offset) \
(*(type *)((buf) + (offset) - 0x7e00))

+int kvm_cpu_dirty_log_size(void);
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 503d3f42da16..b59bf356c478 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -12,6 +12,7 @@

#define KVM_PIO_PAGE_OFFSET 1
#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64

#define DE_VECTOR 0
#define DB_VECTOR 1
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f92b40d798c..f7efb69b089e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1818,7 +1818,13 @@ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu)
{
if (kvm_x86_ops->write_log_dirty)
return kvm_x86_ops->write_log_dirty(vcpu);
+ return 0;
+}

+int kvm_cpu_dirty_log_size(void)
+{
+ if (kvm_x86_ops->cpu_dirty_log_size)
+ return kvm_x86_ops->cpu_dirty_log_size();
return 0;
}

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d175429c91b0..871489d92d3c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7710,6 +7710,7 @@ static __init int hardware_setup(void)
kvm_x86_ops->slot_disable_log_dirty = NULL;
kvm_x86_ops->flush_log_dirty = NULL;
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
+ kvm_x86_ops->cpu_dirty_log_size = NULL;
}

if (!cpu_has_vmx_preemption_timer())
@@ -7774,6 +7775,11 @@ static __exit void hardware_unsetup(void)
free_kvm_area();
}

+static int vmx_cpu_dirty_log_size(void)
+{
+ return enable_pml ? PML_ENTITY_NUM : 0;
+}
+
static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -7896,6 +7902,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.flush_log_dirty = vmx_flush_log_dirty,
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
.write_log_dirty = vmx_write_pml_buffer,
+ .cpu_dirty_log_size = vmx_cpu_dirty_log_size,

.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ed167e039e5..03ff34783fa1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8094,6 +8094,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
*/
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
kvm_hv_process_stimers(vcpu);
+
+ if (kvm_check_request(KVM_REQ_DIRTY_RING_FULL, vcpu)) {
+ vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
+ /*
+ * If this is requested, it means that we've
+ * marked the dirty bit in the dirty ring BUT
+ * we've not written the date. Do it now.
+ */
+ r = kvm_emulate_instruction(vcpu, 0);
+ r = r >= 0 ? 0 : r;
+ goto out;
+ }
}

if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
--
2.21.0