RE: [PATCH 02/15] hyperv: uapi-fy synic event flags definitions

From: KY Srinivasan
Date: Wed Dec 21 2016 - 13:58:59 EST




> -----Original Message-----
> From: Roman Kagan [mailto:rkagan@xxxxxxxxxxxxx]
> Sent: Tuesday, December 20, 2016 7:56 AM
> To: Paolo Bonzini <pbonzini@xxxxxxxxxx>; Radim Krčmář
> <rkrcmar@xxxxxxxxxx>; KY Srinivasan <kys@xxxxxxxxxxxxx>; Vitaly
> Kuznetsov <vkuznets@xxxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>; Ingo Molnar
> <mingo@xxxxxxxxxx>; H. Peter Anvin <hpa@xxxxxxxxx>; x86@xxxxxxxxxx;
> Haiyang Zhang <haiyangz@xxxxxxxxxxxxx>; kvm@xxxxxxxxxxxxxxx; linux-
> kernel@xxxxxxxxxxxxxxx; devel@xxxxxxxxxxxxxxxxxxxxxx; Denis V . Lunev
> <den@xxxxxxxxxx>; Roman Kagan <rkagan@xxxxxxxxxxxxx>
> Subject: [PATCH 02/15] hyperv: uapi-fy synic event flags definitions
>
> Move definitions related to the Hyper-V SynIC event flags to a header
> where they can be consumed by userspace.
>
> While doing so, also clean up their use by switching to standard bitops
> and struct-based dereferencing. The latter is also done for message
> pages.

Please breakup this patch.
>
> Signed-off-by: Roman Kagan <rkagan@xxxxxxxxxxxxx>
> ---
> arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++
> drivers/hv/hyperv_vmbus.h | 24 ++--------------
> drivers/hv/channel_mgmt.c | 10 +++----
> drivers/hv/connection.c | 47 ++++++++++---------------------
> drivers/hv/vmbus_drv.c | 57 ++++++++++++++------------------------
> 5 files changed, 54 insertions(+), 97 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/hyperv.h
> b/arch/x86/include/uapi/asm/hyperv.h
> index 6098ab5..af542a3 100644
> --- a/arch/x86/include/uapi/asm/hyperv.h
> +++ b/arch/x86/include/uapi/asm/hyperv.h
> @@ -363,4 +363,17 @@ struct hv_timer_message_payload {
> #define HV_STIMER_AUTOENABLE (1ULL << 3)
> #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) &
> 0x0F)
>
> +/* Define synthetic interrupt controller flag constants. */
> +#define HV_EVENT_FLAGS_COUNT (256 * 8)
> +
> +/* Define the synthetic interrupt controller event flags format. */
> +struct hv_synic_event_flags {
> + __u64 flags[HV_EVENT_FLAGS_COUNT / 64];
> +};
> +
> +/* Define the synthetic interrupt flags page layout. */
> +struct hv_synic_event_flags_page {
> + struct hv_synic_event_flags
> sintevent_flags[HV_SYNIC_SINT_COUNT];
> +};
> +
> #endif
> diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
> index 4516498..4fab154 100644
> --- a/drivers/hv/hyperv_vmbus.h
> +++ b/drivers/hv/hyperv_vmbus.h
> @@ -26,7 +26,6 @@
> #define _HYPERV_VMBUS_H
>
> #include <linux/list.h>
> -#include <asm/sync_bitops.h>
> #include <linux/atomic.h>
> #include <linux/hyperv.h>
>
> @@ -75,11 +74,6 @@ enum hv_cpuid_function {
>
> #define HV_ANY_VP (0xFFFFFFFF)
>
> -/* Define synthetic interrupt controller flag constants. */
> -#define HV_EVENT_FLAGS_COUNT (256 * 8)
> -#define HV_EVENT_FLAGS_BYTE_COUNT (256)
> -#define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32))
> -
> /* Define invalid partition identifier. */
> #define HV_PARTITION_ID_INVALID ((u64)0x0)
>
> @@ -146,20 +140,6 @@ union hv_timer_config {
> };
> };
>
> -/* Define the number of message buffers associated with each port. */
> -#define HV_PORT_MESSAGE_BUFFER_COUNT (16)
> -
> -/* Define the synthetic interrupt controller event flags format. */
> -union hv_synic_event_flags {
> - u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT];
> - u32 flags32[HV_EVENT_FLAGS_DWORD_COUNT];
> -};
> -
> -/* Define the synthetic interrupt flags page layout. */
> -struct hv_synic_event_flags_page {
> - union hv_synic_event_flags
> sintevent_flags[HV_SYNIC_SINT_COUNT];
> -};
> -
> /* Define SynIC control register. */
> union hv_synic_scontrol {
> u64 as_uint64;
> @@ -434,8 +414,8 @@ struct hv_context {
>
> bool synic_initialized;
>
> - void *synic_message_page[NR_CPUS];
> - void *synic_event_page[NR_CPUS];
> + struct hv_message_page *synic_message_page[NR_CPUS];
> + struct hv_synic_event_flags_page *synic_event_page[NR_CPUS];
> /*
> * Hypervisor's notion of virtual processor ID is different from
> * Linux' notion of CPU ID. This information can only be retrieved
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index 26b4192..49eaae2 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -654,7 +654,6 @@ static void init_vp_index(struct vmbus_channel
> *channel, u16 dev_type)
> static void vmbus_wait_for_unload(void)
> {
> int cpu;
> - void *page_addr;
> struct hv_message *msg;
> struct vmbus_channel_message_header *hdr;
> u32 message_type;
> @@ -673,9 +672,8 @@ static void vmbus_wait_for_unload(void)
> break;
>
> for_each_online_cpu(cpu) {
> - page_addr = hv_context.synic_message_page[cpu];
> - msg = (struct hv_message *)page_addr +
> - VMBUS_MESSAGE_SINT;
> + msg = &hv_context.synic_message_page[cpu]->
> +
> sint_message[VMBUS_MESSAGE_SINT];
>
> message_type = READ_ONCE(msg-
> >header.message_type);
> if (message_type == HVMSG_NONE)
> @@ -699,8 +697,8 @@ static void vmbus_wait_for_unload(void)
> * messages after we reconnect.
> */
> for_each_online_cpu(cpu) {
> - page_addr = hv_context.synic_message_page[cpu];
> - msg = (struct hv_message *)page_addr +
> VMBUS_MESSAGE_SINT;
> + msg = &hv_context.synic_message_page[cpu]->
> + sint_message[VMBUS_MESSAGE_SINT];
> msg->header.message_type = HVMSG_NONE;
> }
> }
> diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
> index 6ce8b87..aaa2103 100644
> --- a/drivers/hv/connection.c
> +++ b/drivers/hv/connection.c
> @@ -381,17 +381,12 @@ static void process_chn_event(u32 relid)
> */
> void vmbus_on_event(unsigned long data)
> {
> - u32 dword;
> - u32 maxdword;
> - int bit;
> - u32 relid;
> - u32 *recv_int_page = NULL;
> - void *page_addr;
> + u32 relid, max_relid;
> + unsigned long *recv_int_page;
> int cpu = smp_processor_id();
> - union hv_synic_event_flags *event;
>
> if (vmbus_proto_version < VERSION_WIN8) {
> - maxdword = MAX_NUM_CHANNELS_SUPPORTED >> 5;
> + max_relid = MAX_NUM_CHANNELS_SUPPORTED;
> recv_int_page = vmbus_connection.recv_int_page;
> } else {
> /*
> @@ -399,36 +394,22 @@ void vmbus_on_event(unsigned long data)
> * can be directly checked to get the id of the channel
> * that has the interrupt pending.
> */
> - maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
> - page_addr = hv_context.synic_event_page[cpu];
> - event = (union hv_synic_event_flags *)page_addr +
> - VMBUS_MESSAGE_SINT;
> - recv_int_page = event->flags32;
> + struct hv_synic_event_flags *event =
> + &hv_context.synic_event_page[cpu]->
> + sintevent_flags[VMBUS_MESSAGE_SINT];
> + max_relid = HV_EVENT_FLAGS_COUNT;
> + recv_int_page = (unsigned long *)event->flags;
> }
>
> -
> -
> /* Check events */
> if (!recv_int_page)
> return;
> - for (dword = 0; dword < maxdword; dword++) {
> - if (!recv_int_page[dword])
> - continue;
> - for (bit = 0; bit < 32; bit++) {
> - if (sync_test_and_clear_bit(bit,
> - (unsigned long *)&recv_int_page[dword])) {
> - relid = (dword << 5) + bit;
> -
> - if (relid == 0)
> - /*
> - * Special case - vmbus
> - * channel protocol msg
> - */
> - continue;
> -
> - process_chn_event(relid);
> - }
> - }
> +
> + /* relid == 0 is vmbus channel protocol msg */
> + relid = 1;
> + for_each_set_bit_from(relid, recv_int_page, max_relid) {
> + clear_bit(relid, recv_int_page);

We are using this test_and_clear_bit paradigm for locking. The current code
used the sync variant to ensure the host saw the changes we were making
here (clearing the bit). You may want to add a barrier here or use the sync
variant.

> + process_chn_event(relid);
> }
> }
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 230c62e..13dd210 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -872,9 +872,8 @@ static void hv_process_timer_expiration(struct
> hv_message *msg, int cpu)
> void vmbus_on_msg_dpc(unsigned long data)
> {
> int cpu = smp_processor_id();
> - void *page_addr = hv_context.synic_message_page[cpu];
> - struct hv_message *msg = (struct hv_message *)page_addr +
> - VMBUS_MESSAGE_SINT;
> + struct hv_message *msg = &hv_context.synic_message_page[cpu]-
> >
> +
> sint_message[VMBUS_MESSAGE_SINT];
> struct vmbus_channel_message_header *hdr;
> struct vmbus_channel_message_table_entry *entry;
> struct onmessage_work_context *ctx;
> @@ -911,54 +910,40 @@ void vmbus_on_msg_dpc(unsigned long data)
> static void vmbus_isr(void)
> {
> int cpu = smp_processor_id();
> - void *page_addr;
> struct hv_message *msg;
> - union hv_synic_event_flags *event;
> - bool handled = false;
> + struct hv_synic_event_flags *event;
>
> - page_addr = hv_context.synic_event_page[cpu];
> - if (page_addr == NULL)
> + if (!hv_context.synic_event_page[cpu])
> return;
>
> - event = (union hv_synic_event_flags *)page_addr +
> - VMBUS_MESSAGE_SINT;
> + event = &hv_context.synic_event_page[cpu]->
> + sintevent_flags[VMBUS_MESSAGE_SINT];
> /*
> * Check for events before checking for messages. This is the order
> * in which events and messages are checked in Windows guests on
> * Hyper-V, and the Windows team suggested we do the same.
> */
>
> - if ((vmbus_proto_version == VERSION_WS2008) ||
> - (vmbus_proto_version == VERSION_WIN7)) {
> -
> + /* On win8 and above the channel interrupts are signaled directly in
> + * the event page and will be checked in the .event_dpc
> + */
> + if (vmbus_proto_version >= VERSION_WIN8 ||
> /* Since we are a child, we only need to check bit 0 */
> - if (sync_test_and_clear_bit(0,
> - (unsigned long *) &event->flags32[0])) {
> - handled = true;
> - }
> - } else {
> - /*
> - * Our host is win8 or above. The signaling mechanism
> - * has changed and we can directly look at the event page.
> - * If bit n is set then we have an interrup on the channel
> - * whose id is n.
> - */
> - handled = true;
> - }
> -
> - if (handled)
> + test_and_clear_bit(0, (unsigned long *)event->flags))

Don't we need the sync variant of test_and_clear_bit here.

> tasklet_schedule(hv_context.event_dpc[cpu]);
>
> -
> - page_addr = hv_context.synic_message_page[cpu];
> - msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
> + msg = &hv_context.synic_message_page[cpu]->
> + sint_message[VMBUS_MESSAGE_SINT];
>
> /* Check if there are actual msgs to be processed */
> - if (msg->header.message_type != HVMSG_NONE) {
> - if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
> - hv_process_timer_expiration(msg, cpu);
> - else
> - tasklet_schedule(hv_context.msg_dpc[cpu]);
> + switch (READ_ONCE(msg->header.message_type)) {
> + case HVMSG_NONE:
> + break;
> + case HVMSG_TIMER_EXPIRED:
> + hv_process_timer_expiration(msg, cpu);
> + break;
> + default:
> + tasklet_schedule(hv_context.msg_dpc[cpu]);
> }
>
> add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0);
> --
> 2.9.3