RE: [RFC PATCH 11/18] virt/mshv: set up synic pages for intercept messages
From: Michael Kelley
Date: Mon Feb 08 2021 - 16:00:34 EST
From: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> Sent: Friday, November 20, 2020 4:31 PM
>
> Same idea as synic setup in drivers/hv/hv.c:hv_synic_enable_regs()
> and hv_synic_disable_regs().
> Setting up synic registers in both vmbus driver and mshv would clobber
> them, but the vmbus driver will not run in the root partition, so this
> is safe.
>
> Co-developed-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx>
> Signed-off-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx>
> Signed-off-by: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 29 ---
> arch/x86/include/uapi/asm/hyperv-tlfs.h | 264 ++++++++++++++++++++++++
> include/asm-generic/hyperv-tlfs.h | 46 +----
> include/linux/mshv.h | 1 +
> include/uapi/asm-generic/hyperv-tlfs.h | 43 ++++
> virt/mshv/mshv_main.c | 98 ++++++++-
> 6 files changed, 404 insertions(+), 77 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 4cd44ae9bffb..c34a6bb4f457 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -267,35 +267,6 @@ struct hv_tsc_emulation_status {
> #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
> #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
>
> -
> -/* Define hypervisor message types. */
> -enum hv_message_type {
> - HVMSG_NONE = 0x00000000,
> -
> - /* Memory access messages. */
> - HVMSG_UNMAPPED_GPA = 0x80000000,
> - HVMSG_GPA_INTERCEPT = 0x80000001,
> -
> - /* Timer notification messages. */
> - HVMSG_TIMER_EXPIRED = 0x80000010,
> -
> - /* Error messages. */
> - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
> - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
> - HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
> -
> - /* Trace buffer complete messages. */
> - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
> -
> - /* Platform-specific processor intercept messages. */
> - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
> - HVMSG_X64_MSR_INTERCEPT = 0x80010001,
> - HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
> - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
> - HVMSG_X64_APIC_EOI = 0x80010004,
> - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
> -};
> -
> struct hv_nested_enlightenments_control {
> struct {
> __u32 directhypercall:1;
> diff --git a/arch/x86/include/uapi/asm/hyperv-tlfs.h b/arch/x86/include/uapi/asm/hyperv-
> tlfs.h
> index 2ff655962738..c6a27053f791 100644
> --- a/arch/x86/include/uapi/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/uapi/asm/hyperv-tlfs.h
> @@ -722,4 +722,268 @@ union hv_register_value {
> pending_virtualization_fault_event;
> };
>
> +/* Define hypervisor message types. */
> +enum hv_message_type {
> + HVMSG_NONE = 0x00000000,
> +
> + /* Memory access messages. */
> + HVMSG_UNMAPPED_GPA = 0x80000000,
> + HVMSG_GPA_INTERCEPT = 0x80000001,
> +
> + /* Timer notification messages. */
> + HVMSG_TIMER_EXPIRED = 0x80000010,
> +
> + /* Error messages. */
> + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
> + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
> + HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
> +
> + /* Trace buffer complete messages. */
> + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
> +
> + /* Platform-specific processor intercept messages. */
> + HVMSG_X64_IO_PORT_INTERCEPT = 0x80010000,
> + HVMSG_X64_MSR_INTERCEPT = 0x80010001,
> + HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
> + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
> + HVMSG_X64_APIC_EOI = 0x80010004,
> + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005,
> + HVMSG_X64_IOMMU_PRQ = 0x80010006,
> + HVMSG_X64_HALT = 0x80010007,
> + HVMSG_X64_INTERRUPTION_DELIVERABLE = 0x80010008,
> + HVMSG_X64_SIPI_INTERCEPT = 0x80010009,
> +};
I have a separate patch series that moves this enum to the
asm-generic portion of hyperv-tlfs.h because there's not a good way
to separate the arch neutral from arch dependent values.
> +
> +
> +union hv_x64_vp_execution_state {
> + __u16 as_uint16;
> + struct {
> + __u16 cpl:2;
> + __u16 cr0_pe:1;
> + __u16 cr0_am:1;
> + __u16 efer_lma:1;
> + __u16 debug_active:1;
> + __u16 interruption_pending:1;
> + __u16 vtl:4;
> + __u16 enclave_mode:1;
> + __u16 interrupt_shadow:1;
> + __u16 virtualization_fault_active:1;
> + __u16 reserved:2;
> + };
> +};
> +
> +/* Values for intercept_access_type field */
> +#define HV_INTERCEPT_ACCESS_READ 0
> +#define HV_INTERCEPT_ACCESS_WRITE 1
> +#define HV_INTERCEPT_ACCESS_EXECUTE 2
> +
> +struct hv_x64_intercept_message_header {
> + __u32 vp_index;
> + __u8 instruction_length:4;
> + __u8 cr8:4; // only set for exo partitions
> + __u8 intercept_access_type;
> + union hv_x64_vp_execution_state execution_state;
> + struct hv_x64_segment_register cs_segment;
> + __u64 rip;
> + __u64 rflags;
> +};
> +
> +#define HV_HYPERCALL_INTERCEPT_MAX_XMM_REGISTERS 6
> +
> +struct hv_x64_hypercall_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + __u64 rax;
> + __u64 rbx;
> + __u64 rcx;
> + __u64 rdx;
> + __u64 r8;
> + __u64 rsi;
> + __u64 rdi;
> + struct hv_u128 xmmregisters[HV_HYPERCALL_INTERCEPT_MAX_XMM_REGISTERS];
> + struct {
> + __u32 isolated:1;
> + __u32 reserved:31;
> + };
> +};
> +
> +union hv_x64_register_access_info {
> + union hv_register_value source_value;
> + enum hv_register_name destination_register;
> + __u64 source_address;
> + __u64 destination_address;
> +};
> +
> +struct hv_x64_register_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + struct {
> + __u8 is_memory_op:1;
> + __u8 reserved:7;
> + };
> + __u8 reserved8;
> + __u16 reserved16;
> + enum hv_register_name register_name;
> + union hv_x64_register_access_info access_info;
> +};
> +
> +union hv_x64_memory_access_info {
> + __u8 as_uint8;
> + struct {
> + __u8 gva_valid:1;
> + __u8 gva_gpa_valid:1;
> + __u8 hypercall_output_pending:1;
> + __u8 tlb_locked_no_overlay:1;
> + __u8 reserved:4;
> + };
> +};
> +
> +union hv_x64_io_port_access_info {
> + __u8 as_uint8;
> + struct {
> + __u8 access_size:3;
> + __u8 string_op:1;
> + __u8 rep_prefix:1;
> + __u8 reserved:3;
> + };
> +};
> +
> +union hv_x64_exception_info {
> + __u8 as_uint8;
> + struct {
> + __u8 error_code_valid:1;
> + __u8 software_exception:1;
> + __u8 reserved:6;
> + };
> +};
> +
> +enum hv_cache_type {
> + HV_CACHE_TYPE_UNCACHED = 0,
> + HV_CACHE_TYPE_WRITE_COMBINING = 1,
> + HV_CACHE_TYPE_WRITE_THROUGH = 4,
> + HV_CACHE_TYPE_WRITE_PROTECTED = 5,
> + HV_CACHE_TYPE_WRITE_BACK = 6
> +};
> +
> +struct hv_x64_memory_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + enum hv_cache_type cache_type;
> + __u8 instruction_byte_count;
> + union hv_x64_memory_access_info memory_access_info;
> + __u8 tpr_priority;
> + __u8 reserved1;
> + __u64 guest_virtual_address;
> + __u64 guest_physical_address;
> + __u8 instruction_bytes[16];
> +};
> +
> +struct hv_x64_cpuid_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + __u64 rax;
> + __u64 rcx;
> + __u64 rdx;
> + __u64 rbx;
> + __u64 default_result_rax;
> + __u64 default_result_rcx;
> + __u64 default_result_rdx;
> + __u64 default_result_rbx;
> +};
> +
> +struct hv_x64_msr_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + __u32 msr_number;
> + __u32 reserved;
> + __u64 rdx;
> + __u64 rax;
> +};
> +
> +struct hv_x64_io_port_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + __u16 port_number;
> + union hv_x64_io_port_access_info access_info;
> + __u8 instruction_byte_count;
> + __u32 reserved;
> + __u64 rax;
> + __u8 instruction_bytes[16];
> + struct hv_x64_segment_register ds_segment;
> + struct hv_x64_segment_register es_segment;
> + __u64 rcx;
> + __u64 rsi;
> + __u64 rdi;
> +};
> +
> +struct hv_x64_exception_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + __u16 exception_vector;
> + union hv_x64_exception_info exception_info;
> + __u8 instruction_byte_count;
> + __u32 error_code;
> + __u64 exception_parameter;
> + __u64 reserved;
> + __u8 instruction_bytes[16];
> + struct hv_x64_segment_register ds_segment;
> + struct hv_x64_segment_register ss_segment;
> + __u64 rax;
> + __u64 rcx;
> + __u64 rdx;
> + __u64 rbx;
Is the above the correct ordering (rax, rcd, rdx, rbx)?
It's just what you would expect ....
> + __u64 rsp;
> + __u64 rbp;
> + __u64 rsi;
> + __u64 rdi;
> + __u64 r8;
> + __u64 r9;
> + __u64 r10;
> + __u64 r11;
> + __u64 r12;
> + __u64 r13;
> + __u64 r14;
> + __u64 r15;
> +};
> +
> +struct hv_x64_invalid_vp_register_message {
> + __u32 vp_index;
> + __u32 reserved;
> +};
> +
> +struct hv_x64_unrecoverable_exception_message {
> + struct hv_x64_intercept_message_header header;
> +};
> +
> +enum hv_x64_unsupported_feature_code {
> + hv_unsupported_feature_intercept = 1,
> + hv_unsupported_feature_task_switch_tss = 2
> +};
> +
> +struct hv_x64_unsupported_feature_message {
> + __u32 vp_index;
> + enum hv_x64_unsupported_feature_code feature_code;
> + __u64 feature_parameter;
> +};
> +
> +struct hv_x64_halt_message {
> + struct hv_x64_intercept_message_header header;
> +};
> +
> +enum hv_x64_pending_interruption_type {
> + HV_X64_PENDING_INTERRUPT = 0,
> + HV_X64_PENDING_NMI = 2,
> + HV_X64_PENDING_EXCEPTION = 3
> +};
> +
> +struct hv_x64_interruption_deliverable_message {
> + struct hv_x64_intercept_message_header header;
> + enum hv_x64_pending_interruption_type deliverable_type;
> + __u32 rsvd;
> +};
> +
> +struct hv_x64_sipi_intercept_message {
> + struct hv_x64_intercept_message_header header;
> + __u32 target_vp_index;
> + __u32 interrupt_vector;
> +};
> +
> +struct hv_x64_apic_eoi_message {
> + __u32 vp_index;
> + __u32 interrupt_vector;
> +};
Same comments as before about enum types, not depending
on the compiler to add padding, and marking as __packed.
> +
> #endif
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index b9295400c20b..e0185c3872a9 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -241,6 +241,8 @@ static inline const char *hv_status_to_string(enum hv_status status)
> /* Valid SynIC vectors are 16-255. */
> #define HV_SYNIC_FIRST_VALID_VECTOR (16)
>
> +#define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000
> +
> #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
> #define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
> #define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
> @@ -250,49 +252,6 @@ static inline const char *hv_status_to_string(enum hv_status
> status)
>
> #define HV_SYNIC_STIMER_COUNT (4)
>
> -/* Define synthetic interrupt controller message constants. */
> -#define HV_MESSAGE_SIZE (256)
> -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
> -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
> -
> -/* Define synthetic interrupt controller message flags. */
> -union hv_message_flags {
> - __u8 asu8;
> - struct {
> - __u8 msg_pending:1;
> - __u8 reserved:7;
> - } __packed;
> -};
> -
> -/* Define port identifier type. */
> -union hv_port_id {
> - __u32 asu32;
> - struct {
> - __u32 id:24;
> - __u32 reserved:8;
> - } __packed u;
> -};
> -
> -/* Define synthetic interrupt controller message header. */
> -struct hv_message_header {
> - __u32 message_type;
> - __u8 payload_size;
> - union hv_message_flags message_flags;
> - __u8 reserved[2];
> - union {
> - __u64 sender;
> - union hv_port_id port;
> - };
> -} __packed;
> -
> -/* Define synthetic interrupt controller message format. */
> -struct hv_message {
> - struct hv_message_header header;
> - union {
> - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
> - } u;
> -} __packed;
> -
> /* Define the synthetic interrupt message page layout. */
> struct hv_message_page {
> struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
> @@ -306,7 +265,6 @@ struct hv_timer_message_payload {
> __u64 delivery_time; /* When the message was delivered */
> } __packed;
>
> -
> /* Define synthetic interrupt controller flag constants. */
> #define HV_EVENT_FLAGS_COUNT (256 * 8)
> #define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
> diff --git a/include/linux/mshv.h b/include/linux/mshv.h
> index dfe469f573f9..7709aaa1e064 100644
> --- a/include/linux/mshv.h
> +++ b/include/linux/mshv.h
> @@ -42,6 +42,7 @@ struct mshv_partition {
> };
>
> struct mshv {
> + struct hv_message_page __percpu **synic_message_page;
> struct {
> spinlock_t lock;
> u64 count;
> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv-
> tlfs.h
> index e7b09b9f00de..e87389054b68 100644
> --- a/include/uapi/asm-generic/hyperv-tlfs.h
> +++ b/include/uapi/asm-generic/hyperv-tlfs.h
> @@ -6,6 +6,49 @@
> #define BIT(X) (1ULL << (X))
> #endif
>
> +/* Define synthetic interrupt controller message constants. */
> +#define HV_MESSAGE_SIZE (256)
> +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
> +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
> +
> +/* Define synthetic interrupt controller message flags. */
> +union hv_message_flags {
> + __u8 asu8;
> + struct {
> + __u8 msg_pending:1;
> + __u8 reserved:7;
> + };
> +};
> +
> +/* Define port identifier type. */
> +union hv_port_id {
> + __u32 asu32;
> + struct {
> + __u32 id:24;
> + __u32 reserved:8;
> + } u;
> +};
> +
> +/* Define synthetic interrupt controller message header. */
> +struct hv_message_header {
> + enum hv_message_type message_type;
> + __u8 payload_size;
> + union hv_message_flags message_flags;
> + __u8 reserved[2];
> + union {
> + __u64 sender;
> + union hv_port_id port;
> + };
> +};
> +
> +/* Define synthetic interrupt controller message format. */
> +struct hv_message {
> + struct hv_message_header header;
> + union {
> + __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
> + } u;
> +};
> +
> /* Userspace-visible partition creation flags */
> #define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0)
> #define HV_PARTITION_CREATION_FLAG_GPA_LARGE_PAGES_DISABLED BIT(3)
> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c
> index 2a10137a1e84..c9445d2edb37 100644
> --- a/virt/mshv/mshv_main.c
> +++ b/virt/mshv/mshv_main.c
> @@ -15,6 +15,8 @@
> #include <linux/file.h>
> #include <linux/anon_inodes.h>
> #include <linux/mm.h>
> +#include <linux/io.h>
> +#include <linux/cpuhotplug.h>
> #include <linux/mshv.h>
> #include <asm/mshyperv.h>
>
> @@ -1152,23 +1154,111 @@ mshv_dev_release(struct inode *inode, struct file *filp)
> return 0;
> }
>
> +static int
> +mshv_synic_init(unsigned int cpu)
> +{
> + union hv_synic_simp simp;
> + union hv_synic_sint sint;
> + union hv_synic_scontrol sctrl;
> + struct hv_message_page **msg_page =
> + this_cpu_ptr(mshv.synic_message_page);
> +
> + /* Setup the Synic's message page */
> + hv_get_simp(simp.as_uint64);
> + simp.simp_enabled = true;
> + *msg_page = memremap(simp.base_simp_gpa << PAGE_SHIFT,
> + PAGE_SIZE, MEMREMAP_WB);
Use HV_HYP_PAGE_SHIFT and HV_HYP_PAGE_SIZE.
> + if (!msg_page) {
> + pr_err("%s: memremap failed\n", __func__);
> + return -EFAULT;
> + }
> + hv_set_simp(simp.as_uint64);
> +
> + /* Enable intercepts */
> + sint.as_uint64 = 0;
> + sint.vector = HYPERVISOR_CALLBACK_VECTOR;
> + sint.masked = false;
> + sint.auto_eoi = hv_recommend_using_aeoi();
> + hv_set_synint_state(HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64);
> +
> + /* Enable global synic bit */
> + hv_get_synic_state(sctrl.as_uint64);
> + sctrl.enable = 1;
> + hv_set_synic_state(sctrl.as_uint64);
> +
> + return 0;
> +}
> +
> +static int
> +mshv_synic_cleanup(unsigned int cpu)
> +{
> + union hv_synic_sint sint;
> + union hv_synic_simp simp;
> + union hv_synic_scontrol sctrl;
> + struct hv_message_page **msg_page =
> + this_cpu_ptr(mshv.synic_message_page);
> +
> + /* Disable the interrupt */
> + hv_get_synint_state(HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64);
> + sint.masked = true;
> + hv_set_synint_state(HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64);
> +
> + /* Disable Synic's message page */
> + hv_get_simp(simp.as_uint64);
> + simp.simp_enabled = false;
> + hv_set_simp(simp.as_uint64);
> + memunmap(*msg_page);
> +
> + /* Disable global synic bit */
> + hv_get_synic_state(sctrl.as_uint64);
> + sctrl.enable = 0;
> + hv_set_synic_state(sctrl.as_uint64);
> +
> + return 0;
> +}
> +
> +static int mshv_cpuhp_online;
> +
> static int
> __init mshv_init(void)
> {
> - int r;
> + int ret;
Ideally, change the name of the variable in the earlier patch so this
one isn't cluttered with the change.
>
> - r = misc_register(&mshv_dev);
> - if (r)
> + ret = misc_register(&mshv_dev);
> + if (ret) {
> pr_err("%s: misc device register failed\n", __func__);
> + return ret;
> + }
> + spin_lock_init(&mshv.partitions.lock);
>
> + mshv.synic_message_page = alloc_percpu(struct hv_message_page *);
> + if (!mshv.synic_message_page) {
> + pr_err("%s: failed to allocate percpu synic page\n", __func__);
> + misc_deregister(&mshv_dev);
> + return -ENOMEM;
> + }
> +
> + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mshv_synic",
> + mshv_synic_init,
> + mshv_synic_cleanup);
> + if (ret < 0) {
> + pr_err("%s: failed to setup cpu hotplug state: %i\n",
> + __func__, ret);
> + return ret;
> + }
> +
> + mshv_cpuhp_online = ret;
> spin_lock_init(&mshv.partitions.lock);
It looks like the spin lock is being initialized twice.
>
> - return r;
> + return 0;
> }
>
> static void
> __exit mshv_exit(void)
> {
> + cpuhp_remove_state(mshv_cpuhp_online);
> + free_percpu(mshv.synic_message_page);
> +
> misc_deregister(&mshv_dev);
> }
>
> --
> 2.25.1