[PATCH v3 11/14] KVM: x86: add KVM_CAP_X2APIC_API

From: Radim KrÄmÃÅ
Date: Tue Jul 12 2016 - 16:10:36 EST


KVM_CAP_X2APIC_API is a capability for features related to x2APIC
enablement. KVM_X2APIC_API_32BIT_FORMAT feature can be enabled to
extend APIC ID in get/set ioctl and MSI addresses to 32 bits.
Both are needed to support x2APIC.

The feature has to be enableable and disabled by default, because
get/set ioctl shifted and truncated APIC ID to 8 bits by using a
non-standard protocol inspired by xAPIC and the change is not
backward-compatible.

Changes to MSI addresses follow the format used by interrupt remapping
unit. The upper address word, that used to be 0, contains upper 24 bits
of the LAPIC address in its upper 24 bits. Lower 8 bits are reserved as
0. Using the upper address word is not backward-compatible either as we
didn't check that userspace zeroed the word. Reserved bits are still
not explicitly checked, but non-zero data will affect LAPIC addresses,
which will cause a bug.

Signed-off-by: Radim KrÄmÃÅ <rkrcmar@xxxxxxxxxx>
---
v3:
* use sub-feature flags -- they allow userspace to postpone enablement
(helps a bit in the [12/14] patch, because QEMU intitializes kvm
before before iommu.)
* check invalid msi route in kvm_arch_set_irq_inatomic [Zhang]

Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++++++++++++++++
arch/x86/include/asm/kvm_host.h | 4 +++-
arch/x86/kvm/irq_comm.c | 29 +++++++++++++++++++++++-----
arch/x86/kvm/lapic.c | 13 +++++++++----
arch/x86/kvm/vmx.c | 2 +-
arch/x86/kvm/x86.c | 15 +++++++++++++++
include/trace/events/kvm.h | 5 +++--
include/uapi/linux/kvm.h | 3 +++
8 files changed, 98 insertions(+), 13 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 09efa9eb3926..10e2bf903e57 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1482,6 +1482,11 @@ struct kvm_irq_routing_msi {
__u32 pad;
};

+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_FORMAT
+feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of
+address_hi must be zero.
+
struct kvm_irq_routing_s390_adapter {
__u64 ind_addr;
__u64 summary_addr;
@@ -1583,6 +1588,16 @@ struct kvm_lapic_state {
Reads the Local APIC registers and copies them into the input argument. The
data format and layout are the same as documented in the architecture manual.

+If KVM_X2APIC_API_USE_32BIT_FORMAT feature of KVM_CAP_X2APIC_API is enabled,
+then the format of APIC_ID register depends on the APIC mode (reported by
+MSR_IA32_APICBASE) of its VCPU. x2APIC stores APIC ID in the APIC_ID register
+(bytes 32-35). xAPIC only allows an 8-bit APIC ID which is stored in bits
+31-24 of the APIC register, or equivalently in byte 35 of struct
+kvm_lapic_state's regs field.
+
+If KVM_X2APIC_API_USE_32BIT_FORMAT feature is disabled, struct kvm_lapic_state
+always uses xAPIC format.
+

4.58 KVM_SET_LAPIC

@@ -1600,6 +1615,10 @@ struct kvm_lapic_state {
Copies the input argument into the Local APIC registers. The data format
and layout are the same as documented in the architecture manual.

+The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
+regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
+See the note in KVM_GET_LAPIC.
+

4.59 KVM_IOEVENTFD

@@ -2180,6 +2199,10 @@ struct kvm_msi {

No flags are defined so far. The corresponding field must be 0.

+On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
+enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
+destination id. Bits 7-0 of address_hi must be zero.
+

4.71 KVM_CREATE_PIT2

@@ -3811,6 +3834,23 @@ Allows use of runtime-instrumentation introduced with zEC12 processor.
Will return -EINVAL if the machine does not support runtime-instrumentation.
Will return -EBUSY if a VCPU has already been created.

+7.7 KVM_CAP_X2APIC_API
+
+Architectures: x86
+Parameters: args[0] - features that should be enabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid features
+
+Valid feature flags in args[0] are
+
+#define KVM_X2APIC_API_USE_32BIT_FORMAT (1ULL << 0)
+
+Enabling KVM_X2APIC_API_USE_32BIT_FORMAT changes the behavior of
+KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC, allowing
+the use of 32-bit APIC IDs. See KVM_CAP_X2APIC_API in their respective
+sections.
+
+
+
8. Other capabilities.
----------------------

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 643e3dffcd85..f13522f85a1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -782,6 +782,8 @@ struct kvm_arch {
u32 ldr_mode;
struct page *avic_logical_id_table_page;
struct page *avic_physical_id_table_page;
+
+ bool x2apic_format;
};

struct kvm_vm_stat {
@@ -1364,7 +1366,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);

-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq);

static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 889563d50c55..25810b144b58 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
return r;
}

-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq *irq)
{
- trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+ trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
+ (u64)e->msi.address_hi << 32 : 0),
+ e->msi.data);

irq->dest_id = (e->msi.address_lo &
MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+ if (kvm->arch.x2apic_format)
+ irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
irq->vector = (e->msi.data &
MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
}
EXPORT_SYMBOL_GPL(kvm_set_msi_irq);

+static inline bool kvm_msi_route_invalid(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e)
+{
+ return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
+}
+
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level, bool line_status)
{
struct kvm_lapic_irq irq;

+ if (kvm_msi_route_invalid(kvm, e))
+ return -EINVAL;
+
if (!level)
return -1;

- kvm_set_msi_irq(e, &irq);
+ kvm_set_msi_irq(kvm, e, &irq);

return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
}
@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
return -EWOULDBLOCK;

- kvm_set_msi_irq(e, &irq);
+ if (kvm_msi_route_invalid(kvm, e))
+ return -EINVAL;
+
+ kvm_set_msi_irq(kvm, e, &irq);

if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r;
@@ -286,6 +302,9 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.address_lo = ue->u.msi.address_lo;
e->msi.address_hi = ue->u.msi.address_hi;
e->msi.data = ue->u.msi.data;
+
+ if (kvm_msi_route_invalid(kvm, e))
+ goto out;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
@@ -394,7 +413,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
if (entry->type != KVM_IRQ_ROUTING_MSI)
continue;

- kvm_set_msi_irq(entry, &irq);
+ kvm_set_msi_irq(vcpu->kvm, entry, &irq);

if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
irq.dest_id, irq.dest_mode))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3c2a8c113054..d27a7829a4ce 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1991,10 +1991,15 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
if (apic_x2apic_mode(vcpu->arch.apic)) {
u32 *id = (u32 *)(s->regs + APIC_ID);

- if (set)
- *id >>= 24;
- else
- *id <<= 24;
+ if (vcpu->kvm->arch.x2apic_format) {
+ if (*id != vcpu->vcpu_id)
+ return -EINVAL;
+ } else {
+ if (set)
+ *id >>= 24;
+ else
+ *id <<= 24;
+ }
}

return 0;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 24b505fda7ad..a12c239eb732 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11097,7 +11097,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* We will support full lowest-priority interrupt later.
*/

- kvm_set_msi_irq(e, &irq);
+ kvm_set_msi_irq(kvm, e, &irq);
if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
/*
* Make sure the IRTE is in remapped mode if
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b178c8c12717..330276315daa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -90,6 +90,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU

+#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_FORMAT)
+
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
@@ -2625,6 +2627,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_TSC_CONTROL:
r = kvm_has_tsc_control;
break;
+ case KVM_CAP_X2APIC_API:
+ r = KVM_X2APIC_API_VALID_FLAGS;
+ break;
default:
r = 0;
break;
@@ -3799,6 +3804,16 @@ split_irqchip_unlock:
mutex_unlock(&kvm->lock);
break;
}
+ case KVM_CAP_X2APIC_API:
+ r = -EINVAL;
+ if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
+ break;
+
+ if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_FORMAT)
+ kvm->arch.x2apic_format = true;
+
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index f28292d73ddb..8ade3eb6c640 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -151,8 +151,9 @@ TRACE_EVENT(kvm_msi_set_irq,
__entry->data = data;
),

- TP_printk("dst %u vec %u (%s|%s|%s%s)",
- (u8)(__entry->address >> 12), (u8)__entry->data,
+ TP_printk("dst %llx vec %u (%s|%s|%s%s)",
+ (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00),
+ (u8)__entry->data,
__print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
(__entry->address & (1<<2)) ? "logical" : "physical",
(__entry->data & (1<<15)) ? "level" : "edge",
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 05ebf475104c..b6de3febabaa 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -866,6 +866,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_ARM_PMU_V3 126
#define KVM_CAP_VCPU_ATTRIBUTES 127
#define KVM_CAP_MAX_VCPU_ID 128
+#define KVM_CAP_X2APIC_API 129

#ifdef KVM_CAP_IRQ_ROUTING

@@ -1313,4 +1314,6 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};

+#define KVM_X2APIC_API_USE_32BIT_FORMAT (1ULL << 0)
+
#endif /* __LINUX_KVM_H */
--
2.9.0