[PART1 RFC v4 08/11] svm: Add VMEXIT handlers for AVIC

From: Suravee Suthikulpanit
Date: Thu Apr 07 2016 - 04:37:53 EST


From: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>

This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception()
and avic_unaccelerated_access_interception() along with two trace points
(trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access).

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
arch/x86/include/uapi/asm/svm.h | 9 +-
arch/x86/kvm/lapic.h | 3 +
arch/x86/kvm/svm.c | 246 ++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/trace.h | 57 ++++++++++
arch/x86/kvm/x86.c | 2 +
5 files changed, 316 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 8a4add8..b9e9bb2 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -73,6 +73,8 @@
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
#define SVM_EXIT_NPF 0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402

#define SVM_EXIT_ERR -1

@@ -107,8 +109,10 @@
{ SVM_EXIT_SMI, "smi" }, \
{ SVM_EXIT_INIT, "init" }, \
{ SVM_EXIT_VINTR, "vintr" }, \
+ { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
{ SVM_EXIT_CPUID, "cpuid" }, \
{ SVM_EXIT_INVD, "invd" }, \
+ { SVM_EXIT_PAUSE, "pause" }, \
{ SVM_EXIT_HLT, "hlt" }, \
{ SVM_EXIT_INVLPG, "invlpg" }, \
{ SVM_EXIT_INVLPGA, "invlpga" }, \
@@ -127,7 +131,10 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT, "mwait" }, \
{ SVM_EXIT_XSETBV, "xsetbv" }, \
- { SVM_EXIT_NPF, "npf" }
+ { SVM_EXIT_NPF, "npf" }, \
+ { SVM_EXIT_RSM, "rsm" }, \
+ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
+ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }


#endif /* _UAPI__SVM_H */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a70cb62..2fc86b7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -9,6 +9,9 @@
#define KVM_APIC_SIPI 1
#define KVM_APIC_LVT_NUM 6

+#define KVM_APIC_SHORT_MASK 0xc0000
+#define KVM_APIC_DEST_MASK 0x800
+
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f9547bc..13fba3b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3515,6 +3515,250 @@ static int mwait_interception(struct vcpu_svm *svm)
return nop_interception(svm);
}

+enum avic_ipi_failure_cause {
+ AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+ AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+ AVIC_IPI_FAILURE_INVALID_TARGET,
+ AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
+{
+ u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
+ u32 icrl = svm->vmcb->control.exit_info_1;
+ u32 id = svm->vmcb->control.exit_info_2 >> 32;
+ u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
+ struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+ trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
+
+ switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+ /*
+ * AVIC hardware handles the generation of
+ * IPIs when the specified Message Type is Fixed
+ * (also known as fixed delivery mode) and
+ * the Trigger Mode is edge-triggered. The hardware
+ * also supports self and broadcast delivery modes
+ * specified via the Destination Shorthand(DSH)
+ * field of the ICRL. Logical and physical APIC ID
+ * formats are supported. All other IPI types cause
+ * a #VMEXIT, which needs to emulated.
+ */
+ kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+ kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+ break;
+ case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm *kvm = svm->vcpu.kvm;
+ struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+ /*
+ * At this point, we expect that the AVIC HW has already
+ * set the appropriate IRR bits on the valid target
+ * vcpus. So, we just need to kick the appropriate vcpu.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ bool m = kvm_apic_match_dest(vcpu, apic,
+ icrl & KVM_APIC_SHORT_MASK,
+ GET_APIC_DEST_FIELD(icrh),
+ icrl & KVM_APIC_DEST_MASK);
+
+ if (m && !avic_vcpu_is_running(vcpu))
+ kvm_vcpu_wake_up(vcpu);
+ }
+ break;
+ }
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
+ break;
+ case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+ WARN_ONCE(1, "Invalid backing page\n");
+ break;
+ default:
+ pr_err("Unknown IPI interception\n");
+ }
+
+ return 1;
+}
+
+static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u8 mda, bool flat)
+{
+ struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ int index;
+ u32 *logical_apic_id_table;
+
+ if (flat) { /* flat */
+ if (mda > 7)
+ return NULL;
+ index = mda;
+ } else { /* cluster */
+ int apic_id = mda & 0xf;
+ int cluster_id = (mda & 0xf0) >> 8;
+
+ if (apic_id > 4 || cluster_id >= 0xf)
+ return NULL;
+ index = (cluster_id << 2) + apic_id;
+ }
+ logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+
+ return &logical_apic_id_table[index];
+}
+
+static int avic_handle_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id,
+ u8 logical_id)
+{
+ u32 mod;
+ u32 *entry, new_entry;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (!svm)
+ return -EINVAL;
+
+ mod = (kvm_apic_get_reg(svm->vcpu.arch.apic, APIC_DFR) >> 28) & 0xf;
+ entry = avic_get_logical_id_entry(vcpu, logical_id, (mod == 0xf));
+ if (!entry)
+ return -EINVAL;
+
+ new_entry = READ_ONCE(*entry);
+ new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+ new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
+ new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+ WRITE_ONCE(*entry, new_entry);
+
+ return 0;
+}
+
+static int avic_unaccel_trap_write(struct vcpu_svm *svm)
+{
+ u32 offset = svm->vmcb->control.exit_info_1 & 0xFF0;
+ struct kvm_lapic *apic = svm->vcpu.arch.apic;
+ u32 reg = kvm_apic_get_reg(apic, offset);
+
+ switch (offset) {
+ case APIC_ID: {
+ u32 aid = (reg >> 24) & 0xff;
+ u64 *o_ent = avic_get_physical_id_entry(&svm->vcpu,
+ svm->vcpu.vcpu_id);
+ u64 *n_ent = avic_get_physical_id_entry(&svm->vcpu, aid);
+
+ if (!n_ent || !o_ent)
+ return 0;
+
+ /* We need to move physical_id_entry to new offset */
+ *n_ent = *o_ent;
+ *o_ent = 0ULL;
+ svm->avic_physical_id_cache = n_ent;
+ break;
+ }
+ case APIC_LDR: {
+ int ret, lid;
+ int dlid = (reg >> 24) & 0xff;
+
+ if (!dlid)
+ return 0;
+
+ lid = ffs(dlid) - 1;
+ ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid);
+ if (ret)
+ return 0;
+
+ break;
+ }
+ case APIC_DFR: {
+ struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+ u32 mod = (reg >> 28) & 0xf;
+
+ /*
+ * We assume that all local APICs are using the same type.
+ * If this changes, we need to rebuild the AVIC logical
+ * APID id table with subsequent write to APIC_LDR.
+ */
+ if (vm_data->ldr_mode != mod) {
+ clear_page(page_address(vm_data->avic_logical_id_table_page));
+ vm_data->ldr_mode = mod;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ kvm_lapic_reg_write(apic, offset, reg);
+
+ return 1;
+}
+
+static bool is_avic_unaccelerated_access_trap(u32 offset)
+{
+ bool ret = false;
+
+ switch (offset) {
+ case APIC_ID:
+ case APIC_EOI:
+ case APIC_RRR:
+ case APIC_LDR:
+ case APIC_DFR:
+ case APIC_SPIV:
+ case APIC_ESR:
+ case APIC_ICR:
+ case APIC_LVTT:
+ case APIC_LVTTHMR:
+ case APIC_LVTPC:
+ case APIC_LVT0:
+ case APIC_LVT1:
+ case APIC_LVTERR:
+ case APIC_TMICT:
+ case APIC_TDCR:
+ ret = true;
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
+
+static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
+{
+ int ret = 0;
+ u32 offset = svm->vmcb->control.exit_info_1 &
+ AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+ u32 vector = svm->vmcb->control.exit_info_2 &
+ AVIC_UNACCEL_ACCESS_VECTOR_MASK;
+ bool write = (svm->vmcb->control.exit_info_1 >> 32) &
+ AVIC_UNACCEL_ACCESS_WRITE_MASK;
+ bool trap = is_avic_unaccelerated_access_trap(offset);
+
+ trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
+ trap, write, vector);
+
+ /**
+ * AVIC does not support x2APIC registers, and we only advertise
+ * xAPIC when enable AVIC. Therefore, access to these registers
+ * will not be supported.
+ */
+ if (offset >= 0x400) {
+ WARN(1, "Unsupported APIC offset %#x\n", offset);
+ return ret;
+ }
+
+ if (trap) {
+ /* Handling Trap */
+ if (!write) /* Trap read should never happens */
+ BUG();
+ ret = avic_unaccel_trap_write(svm);
+ } else {
+ /* Handling Fault */
+ ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+ }
+
+ return ret;
+}
+
static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
@@ -3578,6 +3822,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = pf_interception,
[SVM_EXIT_RSM] = emulate_on_interception,
+ [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
+ [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
};

static void dump_vmcb(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 2f1ea2f..39f264c 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1292,6 +1292,63 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
__entry->vcpu_id, __entry->timer_index)
);

+/*
+ * Tracepoint for AMD AVIC
+ */
+TRACE_EVENT(kvm_avic_incomplete_ipi,
+ TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
+ TP_ARGS(vcpu, icrh, icrl, id, index),
+
+ TP_STRUCT__entry(
+ __field(u32, vcpu)
+ __field(u32, icrh)
+ __field(u32, icrl)
+ __field(u32, id)
+ __field(u32, index)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->icrh = icrh;
+ __entry->icrl = icrl;
+ __entry->id = id;
+ __entry->index = index;
+ ),
+
+ TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+ __entry->vcpu, __entry->icrh, __entry->icrl,
+ __entry->id, __entry->index)
+);
+
+TRACE_EVENT(kvm_avic_unaccelerated_access,
+ TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
+ TP_ARGS(vcpu, offset, ft, rw, vec),
+
+ TP_STRUCT__entry(
+ __field(u32, vcpu)
+ __field(u32, offset)
+ __field(bool, ft)
+ __field(bool, rw)
+ __field(u32, vec)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu = vcpu;
+ __entry->offset = offset;
+ __entry->ft = ft;
+ __entry->rw = rw;
+ __entry->vec = vec;
+ ),
+
+ TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+ __entry->vcpu,
+ __entry->offset,
+ __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
+ __entry->ft ? "trap" : "fault",
+ __entry->rw ? "write" : "read",
+ __entry->vec)
+);
+
#endif /* _TRACE_KVM_H */

#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d12583e..b0f211c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8437,3 +8437,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);
--
1.9.1