[PATCH 3.16 023/212] Drivers: hv: vmbus: Raise retry/wait limits in vmbus_post_msg()

From: Ben Hutchings
Date: Thu Jun 01 2017 - 12:41:19 EST


3.16.44-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>

commit c0bb03924f1a80e7f65900e36c8e6b3dc167c5f8 upstream.

DoS protection conditions were altered in WS2016 and now it's easy to get
-EAGAIN returned from vmbus_post_msg() (e.g. when we try changing MTU on a
netvsc device in a loop). All vmbus_post_msg() callers don't retry the
operation and we usually end up with a non-functional device or crash.

While host's DoS protection conditions are unknown to me my tests show that
it can take up to 10 seconds before the message is sent so doing udelay()
is not an option, we really need to sleep. Almost all vmbus_post_msg()
callers are ready to sleep but there is one special case:
vmbus_initiate_unload() which can be called from interrupt/NMI context and
we can't sleep there. I'm also not sure about the lonely
vmbus_send_tl_connect_request() which has no in-tree users but its external
users are most likely waiting for the host to reply so sleeping there is
also appropriate.

Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
Signed-off-by: K. Y. Srinivasan <kys@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
[bwh: Backported to 3.16:
- Drop changes in vmbus_send_tl_connect_request(),
vmbus_initiate_unload()
- Adjust context, indentation]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -163,7 +163,7 @@ int vmbus_open(struct vmbus_channel *new
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

ret = vmbus_post_msg(open_msg,
- sizeof(struct vmbus_channel_open_channel));
+ sizeof(struct vmbus_channel_open_channel), true);

if (ret != 0) {
err = ret;
@@ -391,7 +391,7 @@ int vmbus_establish_gpadl(struct vmbus_c
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

ret = vmbus_post_msg(gpadlmsg, msginfo->msgsize -
- sizeof(*msginfo));
+ sizeof(*msginfo), true);
if (ret != 0)
goto cleanup;

@@ -407,8 +407,8 @@ int vmbus_establish_gpadl(struct vmbus_c
gpadl_body->gpadl = next_gpadl_handle;

ret = vmbus_post_msg(gpadl_body,
- submsginfo->msgsize -
- sizeof(*submsginfo));
+ submsginfo->msgsize - sizeof(*submsginfo),
+ true);
if (ret != 0)
goto cleanup;

@@ -456,8 +456,8 @@ int vmbus_teardown_gpadl(struct vmbus_ch
list_add_tail(&info->msglistentry,
&vmbus_connection.chn_msg_list);
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
- ret = vmbus_post_msg(msg,
- sizeof(struct vmbus_channel_gpadl_teardown));
+ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown),
+ true);

if (ret)
goto post_msg_err;
@@ -502,7 +502,8 @@ static int vmbus_close_internal(struct v
msg->header.msgtype = CHANNELMSG_CLOSECHANNEL;
msg->child_relid = channel->offermsg.child_relid;

- ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel));
+ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_close_channel),
+ true);

if (ret) {
pr_err("Close failed: close post msg return is %d\n", ret);
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -229,7 +229,8 @@ static void vmbus_process_rescind_offer(
memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
msg.child_relid = channel->offermsg.child_relid;
msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
- vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
+ vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
+ true);

if (channel->target_cpu != smp_processor_id())
smp_call_function_single(channel->target_cpu,
@@ -771,8 +772,8 @@ int vmbus_request_offers(void)
msg->msgtype = CHANNELMSG_REQUESTOFFERS;


- ret = vmbus_post_msg(msg,
- sizeof(struct vmbus_channel_message_header));
+ ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
+ true);
if (ret != 0) {
pr_err("Unable to request offers - %d\n", ret);

--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -94,7 +94,8 @@ static int vmbus_negotiate_version(struc
spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);

ret = vmbus_post_msg(msg,
- sizeof(struct vmbus_channel_initiate_contact));
+ sizeof(struct vmbus_channel_initiate_contact),
+ true);
if (ret != 0) {
spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
list_del(&msginfo->msglistentry);
@@ -413,7 +414,7 @@ void vmbus_on_event(unsigned long data)
/*
* vmbus_post_msg - Send a msg on the vmbus's message connection
*/
-int vmbus_post_msg(void *buffer, size_t buflen)
+int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep)
{
union hv_connection_id conn_id;
int ret = 0;
@@ -428,7 +429,7 @@ int vmbus_post_msg(void *buffer, size_t
* insufficient resources. Retry the operation a couple of
* times before giving up.
*/
- while (retries < 20) {
+ while (retries < 100) {
ret = hv_post_message(conn_id, 1, buffer, buflen);

switch (ret) {
@@ -451,8 +452,14 @@ int vmbus_post_msg(void *buffer, size_t
}

retries++;
- udelay(usec);
- if (usec < 2048)
+ if (can_sleep && usec > 1000)
+ msleep(usec / 1000);
+ else if (usec < MAX_UDELAY_MS * 1000)
+ udelay(usec);
+ else
+ mdelay(usec / 1000);
+
+ if (usec < 256000)
usec *= 2;
}
return ret;
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -672,7 +672,7 @@ void vmbus_free_channels(void);

int vmbus_connect(void);

-int vmbus_post_msg(void *buffer, size_t buflen);
+int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep);

int vmbus_set_event(struct vmbus_channel *channel);