[PATCH char-misc 1/1] Drivers: hv: vmbus: Implement Direct Mode for stimer0
From: mikelley
Date: Tue Oct 31 2017 - 18:20:09 EST
From: Michael Kelley <mikelley@xxxxxxxxxxxxx>
The 2016 version of Hyper-V offers the option to operate the guest VM
per-vcpu stimer's in Direct Mode, which means the timer interupts on its
own vector rather than queueing a VMbus message. Direct Mode reduces
timer processing overhead in both the hypervisor and the guest, and
avoids having timer interrupts pollute the VMbus interrupt stream for
the synthetic NIC and storage. This patch enables Direct Mode by
default on stimer0 (which is the only stimer used in Linux on Hyper-V)
when running on a version of Hyper-V that supports it, with a hv_vmbus
module parameter for disabling Direct Mode and reverting to the old
behavior.
Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>
---
arch/x86/include/asm/mshyperv.h | 14 ++++++++
arch/x86/include/uapi/asm/hyperv.h | 26 ++++++++++++++
arch/x86/kernel/cpu/mshyperv.c | 64 +++++++++++++++++++++++++++++++++-
drivers/hv/hv.c | 71 ++++++++++++++++++++++++++++++++++++--
drivers/hv/hyperv_vmbus.h | 4 ++-
5 files changed, 175 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 740dc97..1bba1d7 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -4,6 +4,8 @@
#include <linux/types.h>
#include <linux/atomic.h>
#include <linux/nmi.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
#include <asm/io.h>
#include <asm/hyperv.h>
@@ -374,4 +376,16 @@ static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
return NULL;
}
#endif
+
+/* Per architecture routines for stimer0 Direct Mode handling. On x86/x64
+ * there are no percpu actions to take.
+ */
+#if IS_ENABLED(CONFIG_HYPERV)
+static inline void hv_enable_stimer0_percpu_irq(int irq) { }
+static inline void hv_disable_stimer0_percpu_irq(int irq) { }
+extern int hv_allocate_stimer0_irq(int *irq, int *vector);
+extern void hv_deallocate_stimer0_irq(int irq);
+extern void hv_ack_stimer0_interrupt(struct irq_desc *desc);
+#endif
+
#endif
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index f65d125..408cf3e 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -112,6 +112,22 @@
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
/* Guest crash data handler available */
#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+/* Debug MSRs available */
+#define HV_X64_DEBUG_MSR_AVAILABLE (1 << 11)
+/* Support for Non-Privileged Instruction Execution Prevention is available */
+#define HV_X64_NPIEP_AVAILABLE (1 << 12)
+/* Support for DisableHypervisor is available */
+#define HV_X64_DISABLE_HYPERVISOR_AVAILABLE (1 << 13)
+/* Extended GVA Ranges for Flush Virtual Address list is available */
+#define HV_X64_EXTENDED_GVA_RANGE_AVAILABLE (1 << 14)
+/* Return Hypercall output via XMM registers is available */
+#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE (1 << 15)
+/* SINT polling mode available */
+#define HV_X64_SINT_POLLING_MODE_AVAILABLE (1 << 17)
+/* Hypercall MSR lock is available */
+#define HV_X64_HYPERCALL_MSR_LOCK_AVAILABLE (1 << 18)
+/* stimer direct mode is available */
+#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE (1 << 19)
/*
* Implementation recommendations. Indicates which behaviors the hypervisor
@@ -300,6 +316,16 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_SYNIC_STIMER_COUNT (4)
+/* Hardware IRQ number to use for stimer0 in Direct Mode. This IRQ is a fake
+ * because stimer's in Direct Mode simply interrupt on the specified vector,
+ * without using a particular IOAPIC pin. But we use the IRQ allocation
+ * machinery, so we need a hardware IRQ #. This value is somewhat arbitrary,
+ * but it should not be a legacy IRQ (0 to 15), and should fit within the
+ * single IOAPIC (0 to 23) that Hyper-V provides to a guest VM. So any value
+ * between 16 and 23 should be good.
+ */
+#define HV_STIMER0_IRQNR 18
+
/* Define synthetic interrupt controller message constants. */
#define HV_MESSAGE_SIZE (256)
#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 236324e8..88dc243 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -19,7 +19,10 @@
#include <linux/efi.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/irqdesc.h>
#include <linux/kexec.h>
+#include <linux/acpi.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
#include <asm/hyperv.h>
@@ -27,6 +30,7 @@
#include <asm/desc.h>
#include <asm/irq_regs.h>
#include <asm/i8259.h>
+#include <asm/irqdomain.h>
#include <asm/apic.h>
#include <asm/timer.h>
#include <asm/reboot.h>
@@ -69,6 +73,64 @@ void hv_remove_vmbus_irq(void)
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+
+/* Routines to do per-architecture handling of stimer0 when in Direct Mode */
+
+void hv_ack_stimer0_interrupt(struct irq_desc *desc)
+{
+ ack_APIC_irq();
+}
+
+static void allonline_vector_allocation_domain(int cpu, struct cpumask *retmask,
+ const struct cpumask *mask)
+{
+ cpumask_copy(retmask, cpu_online_mask);
+}
+
+int hv_allocate_stimer0_irq(int *irq, int *vector)
+{
+ int localirq;
+ int result;
+ struct irq_data *irq_data;
+
+ /* The normal APIC vector allocation domain allows allocation of vectors
+ * only for the calling CPU. So we change the allocation domain to one
+ * that allows vectors to be allocated in all online CPUs. This
+ * change is fine in a Hyper-V VM because VMs don't have the usual
+ * complement of interrupting devices.
+ */
+ apic->vector_allocation_domain = allonline_vector_allocation_domain;
+ localirq = acpi_register_gsi(NULL, HV_STIMER0_IRQNR,
+ ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+ if (localirq < 0) {
+ pr_err("Cannot register stimer0 gsi. Error %d", localirq);
+ return -1;
+ }
+
+ /* We pass in a dummy IRQ handler because architecture independent code
+ * will later override the IRQ domain interrupt handler and set it to a
+ * Hyper-V specific handler.
+ */
+ result = request_irq(localirq, (irq_handler_t)(-1), 0,
+ "Hyper-V stimer0", NULL);
+ if (result) {
+ pr_err("Cannot request stimer0 irq. Error %d", result);
+ acpi_unregister_gsi(localirq);
+ return -1;
+ }
+ irq_data = irq_domain_get_irq_data(x86_vector_domain, localirq);
+ *vector = irqd_cfg(irq_data)->vector;
+ *irq = localirq;
+ return 0;
+}
+
+void hv_deallocate_stimer0_irq(int irq)
+{
+ free_irq(irq, NULL);
+ acpi_unregister_gsi(irq);
+}
+
+
void hv_setup_kexec_handler(void (*handler)(void))
{
hv_kexec_handler = handler;
@@ -195,7 +257,7 @@ static void __init ms_hyperv_init_platform(void)
hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
- pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
+ pr_info("Hyper-V: Host Build %d-%d.%d-%d-%d.%d\n",
hv_host_info_eax, hv_host_info_ebx >> 16,
hv_host_info_ebx & 0xFFFF, hv_host_info_ecx,
hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index fe96aab..68ac474 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -27,8 +27,12 @@
#include <linux/vmalloc.h>
#include <linux/hyperv.h>
#include <linux/version.h>
-#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/random.h>
+#include <linux/kernel_stat.h>
#include <linux/clockchips.h>
+#include <linux/module.h>
#include <asm/hyperv.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
@@ -38,6 +42,21 @@ struct hv_context hv_context = {
.synic_initialized = false,
};
+/* If true, we're using Direct Mode for stimer0, and the timer will do it own
+ * interrupt when it expires. If false, stimer0 is not using Direct Mode and
+ * will send a VMbus message when it expires. We prefer to use Direct Mode,
+ * but not all versions of Hyper-V support Direct Mode.
+ *
+ * While Hyper-V provides a total of four stimer's per CPU, Linux use only
+ * stimer0.
+ */
+static bool stimer_direct_mode;
+static int stimer0_irq;
+static int stimer0_vector;
+static bool direct_mode_disable;
+module_param(direct_mode_disable, bool, 0444);
+MODULE_PARM_DESC(direct_mode_disable, "Set to Y to disable Direct Mode.");
+
#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */
#define HV_MAX_MAX_DELTA_TICKS 0xffffffff
#define HV_MIN_DELTA_TICKS 1
@@ -52,7 +71,12 @@ int hv_init(void)
hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
if (!hv_context.cpu_context)
return -ENOMEM;
+ stimer_direct_mode = (ms_hyperv.misc_features &
+ HV_X64_STIMER_DIRECT_MODE_AVAILABLE) ? true : false;
+ /* Apply boot command line override to the Direct Mode setting */
+ if (direct_mode_disable)
+ stimer_direct_mode = false;
return 0;
}
@@ -91,6 +115,23 @@ int hv_post_message(union hv_connection_id connection_id,
return status & 0xFFFF;
}
+/* ISR for when stimer0 is operating in Direct Mode. Direct Mode does
+ * not use VMBus or any VMBus messages, so process here and not in the
+ * VMBus driver code.
+ */
+
+static void hv_stimer0_isr(struct irq_desc *desc)
+{
+ struct hv_per_cpu_context *hv_cpu;
+
+ __this_cpu_inc(*desc->kstat_irqs);
+ __this_cpu_inc(kstat.irqs_sum);
+ hv_ack_stimer0_interrupt(desc);
+ hv_cpu = this_cpu_ptr(hv_context.cpu_context);
+ hv_cpu->clk_evt->event_handler(hv_cpu->clk_evt);
+ add_interrupt_randomness(desc->irq_data.irq, 0);
+}
+
static int hv_ce_set_next_event(unsigned long delta,
struct clock_event_device *evt)
{
@@ -108,6 +149,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt)
{
hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0);
hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0);
+ if (stimer_direct_mode)
+ hv_disable_stimer0_percpu_irq(stimer0_irq);
return 0;
}
@@ -116,9 +159,24 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
{
union hv_timer_config timer_cfg;
+ timer_cfg.as_uint64 = 0; /* Zero everything */
timer_cfg.enable = 1;
timer_cfg.auto_enable = 1;
- timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+ if (stimer_direct_mode) {
+
+ /* When it expires, the timer will directly interrupt
+ * on the specific hardware vector.
+ */
+ timer_cfg.direct_mode = 1;
+ timer_cfg.apic_vector = stimer0_vector;
+ hv_enable_stimer0_percpu_irq(stimer0_irq);
+ } else {
+ /* When it expires, the timer will generate a VMbus message,
+ * to be handled by the normal VMbus interrupt handler.
+ */
+ timer_cfg.direct_mode = 0;
+ timer_cfg.sintx = VMBUS_MESSAGE_SINT;
+ }
hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
return 0;
@@ -191,6 +249,12 @@ int hv_synic_alloc(void)
INIT_LIST_HEAD(&hv_cpu->chan_list);
}
+ if (stimer_direct_mode) {
+ if (hv_allocate_stimer0_irq(&stimer0_irq, &stimer0_vector))
+ goto err;
+ irq_set_handler(stimer0_irq, hv_stimer0_isr);
+ }
+
return 0;
err:
return -ENOMEM;
@@ -292,6 +356,9 @@ void hv_synic_clockevents_cleanup(void)
if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE))
return;
+ if (stimer_direct_mode)
+ hv_deallocate_stimer0_irq(stimer0_irq);
+
for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index de6f01d..ee8c89b 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -55,7 +55,9 @@
u64 periodic:1;
u64 lazy:1;
u64 auto_enable:1;
- u64 reserved_z0:12;
+ u64 apic_vector:8;
+ u64 direct_mode:1;
+ u64 reserved_z0:3;
u64 sintx:4;
u64 reserved_z1:44;
};
--
1.8.3.1