[PATCH v2 19/27] drm/dp_mst: Handle UP requests asynchronously

From: Lyude Paul
Date: Tue Sep 03 2019 - 16:48:57 EST


Once upon a time, hotplugging devices on MST branches actually worked in
DRM. Now, it only works in amdgpu (likely because of how it's hotplug
handlers are implemented). On both i915 and nouveau, hotplug
notifications from MST branches are noticed - but trying to respond to
them causes messaging timeouts and causes the whole topology state to go
out of sync with reality, usually resulting in the user needing to
replug the entire topology in hopes that it actually fixes things.

The reason for this is because the way we currently handle UP requests
in MST is completely bogus. drm_dp_mst_handle_up_req() is called from
drm_dp_mst_hpd_irq(), which is usually called from the driver's hotplug
handler. Because we handle sending the hotplug event from this function,
we actually cause the driver's hotplug handler (and in turn, all
sideband transactions) to block on
drm_device->mode_config.connection_mutex. This makes it impossible to
send any sideband messages from the driver's connector probing
functions, resulting in the aforementioned sideband message timeout.

There's even more problems with this beyond breaking hotplugging on MST
branch devices. It also makes it almost impossible to protect
drm_dp_mst_port struct members under a lock because we then have to
worry about dealing with all of the lock dependency issues that ensue.

So, let's finally actually fix this issue by handling the processing of
up requests asyncronously. This way we can send sideband messages from
most contexts without having to deal with getting blocked if we hold
connection_mutex. This also fixes MST branch device hotplugging on i915,
finally!

Cc: Juston Li <juston.li@xxxxxxxxx>
Cc: Imre Deak <imre.deak@xxxxxxxxx>
Cc: Ville SyrjÃlà <ville.syrjala@xxxxxxxxxxxxxxx>
Cc: Harry Wentland <hwentlan@xxxxxxx>
Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
Signed-off-by: Lyude Paul <lyude@xxxxxxxxxx>
---
drivers/gpu/drm/drm_dp_mst_topology.c | 146 +++++++++++++++++++-------
include/drm/drm_dp_mst_helper.h | 16 +++
2 files changed, 122 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index cfaf9eb7ace9..5101eeab4485 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -46,6 +46,12 @@
* protocol. The helpers contain a topology manager and bandwidth manager.
* The helpers encapsulate the sending and received of sideband msgs.
*/
+struct drm_dp_pending_up_req {
+ struct drm_dp_sideband_msg_hdr hdr;
+ struct drm_dp_sideband_msg_req_body msg;
+ struct list_head next;
+};
+
static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr,
char *buf);

@@ -3109,6 +3115,7 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr)
drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
DP_MST_EN | DP_UPSTREAM_IS_SRC);
mutex_unlock(&mgr->lock);
+ flush_work(&mgr->up_req_work);
flush_work(&mgr->work);
flush_work(&mgr->delayed_destroy_work);
}
@@ -3281,12 +3288,70 @@ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
return 0;
}

+static inline void
+drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr,
+ struct drm_dp_pending_up_req *up_req)
+{
+ struct drm_dp_mst_branch *mstb = NULL;
+ struct drm_dp_sideband_msg_req_body *msg = &up_req->msg;
+ struct drm_dp_sideband_msg_hdr *hdr = &up_req->hdr;
+
+ if (hdr->broadcast) {
+ const u8 *guid = NULL;
+
+ if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY)
+ guid = msg->u.conn_stat.guid;
+ else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY)
+ guid = msg->u.resource_stat.guid;
+
+ mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
+ } else {
+ mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
+ }
+
+ if (!mstb) {
+ DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
+ hdr->lct);
+ return;
+ }
+
+ /* TODO: Add missing handler for DP_RESOURCE_STATUS_NOTIFY events */
+ if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) {
+ drm_dp_mst_handle_conn_stat(mstb, &msg->u.conn_stat);
+ drm_kms_helper_hotplug_event(mgr->dev);
+ }
+
+ drm_dp_mst_topology_put_mstb(mstb);
+}
+
+static void drm_dp_mst_up_req_work(struct work_struct *work)
+{
+ struct drm_dp_mst_topology_mgr *mgr =
+ container_of(work, struct drm_dp_mst_topology_mgr,
+ up_req_work);
+ struct drm_dp_pending_up_req *up_req;
+
+ while (true) {
+ mutex_lock(&mgr->up_req_lock);
+ up_req = list_first_entry_or_null(&mgr->up_req_list,
+ struct drm_dp_pending_up_req,
+ next);
+ if (up_req)
+ list_del(&up_req->next);
+ mutex_unlock(&mgr->up_req_lock);
+
+ if (!up_req)
+ break;
+
+ drm_dp_mst_process_up_req(mgr, up_req);
+ kfree(up_req);
+ }
+}
+
static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
{
- struct drm_dp_sideband_msg_req_body msg;
struct drm_dp_sideband_msg_hdr *hdr = &mgr->up_req_recv.initial_hdr;
- struct drm_dp_mst_branch *mstb = NULL;
- const u8 *guid;
+ struct drm_dp_pending_up_req *up_req;
bool seqno;

if (!drm_dp_get_one_sb_msg(mgr, true))
@@ -3295,56 +3360,53 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
if (!mgr->up_req_recv.have_eomt)
return 0;

- if (!hdr->broadcast) {
- mstb = drm_dp_get_mst_branch_device(mgr, hdr->lct, hdr->rad);
- if (!mstb) {
- DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
- hdr->lct);
- goto out;
- }
+ up_req = kzalloc(sizeof(*up_req), GFP_KERNEL);
+ if (!up_req) {
+ DRM_ERROR("Not enough memory to process MST up req\n");
+ return -ENOMEM;
}
+ INIT_LIST_HEAD(&up_req->next);

seqno = hdr->seqno;
- drm_dp_sideband_parse_req(&mgr->up_req_recv, &msg);
+ drm_dp_sideband_parse_req(&mgr->up_req_recv, &up_req->msg);

- if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY)
- guid = msg.u.conn_stat.guid;
- else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY)
- guid = msg.u.resource_stat.guid;
- else
+ if (up_req->msg.req_type != DP_CONNECTION_STATUS_NOTIFY &&
+ up_req->msg.req_type != DP_RESOURCE_STATUS_NOTIFY) {
+ DRM_DEBUG_KMS("Received unknown up req type, ignoring: %x\n",
+ up_req->msg.req_type);
+ kfree(up_req);
goto out;
-
- drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno,
- false);
-
- if (!mstb) {
- mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid);
- if (!mstb) {
- DRM_DEBUG_KMS("Got MST reply from unknown device %d\n",
- hdr->lct);
- goto out;
- }
}

- if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
- drm_dp_mst_handle_conn_stat(mstb, &msg.u.conn_stat);
+ drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, up_req->msg.req_type,
+ seqno, false);
+
+ if (up_req->msg.req_type == DP_CONNECTION_STATUS_NOTIFY) {
+ const struct drm_dp_connection_status_notify *conn_stat =
+ &up_req->msg.u.conn_stat;

DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n",
- msg.u.conn_stat.port_number,
- msg.u.conn_stat.legacy_device_plug_status,
- msg.u.conn_stat.displayport_device_plug_status,
- msg.u.conn_stat.message_capability_status,
- msg.u.conn_stat.input_port,
- msg.u.conn_stat.peer_device_type);
+ conn_stat->port_number,
+ conn_stat->legacy_device_plug_status,
+ conn_stat->displayport_device_plug_status,
+ conn_stat->message_capability_status,
+ conn_stat->input_port,
+ conn_stat->peer_device_type);
+ } else if (up_req->msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
+ const struct drm_dp_resource_status_notify *res_stat =
+ &up_req->msg.u.resource_stat;

- drm_kms_helper_hotplug_event(mgr->dev);
- } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) {
DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n",
- msg.u.resource_stat.port_number,
- msg.u.resource_stat.available_pbn);
+ res_stat->port_number,
+ res_stat->available_pbn);
}

- drm_dp_mst_topology_put_mstb(mstb);
+ up_req->hdr = *hdr;
+ mutex_lock(&mgr->up_req_lock);
+ list_add_tail(&up_req->next, &mgr->up_req_list);
+ mutex_unlock(&mgr->up_req_lock);
+ queue_work(system_long_wq, &mgr->up_req_work);
+
out:
memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx));
return 0;
@@ -4320,12 +4382,15 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
mutex_init(&mgr->qlock);
mutex_init(&mgr->payload_lock);
mutex_init(&mgr->delayed_destroy_lock);
+ mutex_init(&mgr->up_req_lock);
INIT_LIST_HEAD(&mgr->tx_msg_downq);
INIT_LIST_HEAD(&mgr->destroy_port_list);
INIT_LIST_HEAD(&mgr->destroy_branch_device_list);
+ INIT_LIST_HEAD(&mgr->up_req_list);
INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work);
INIT_WORK(&mgr->tx_work, drm_dp_tx_work);
INIT_WORK(&mgr->delayed_destroy_work, drm_dp_delayed_destroy_work);
+ INIT_WORK(&mgr->up_req_work, drm_dp_mst_up_req_work);
init_waitqueue_head(&mgr->tx_waitq);
mgr->dev = dev;
mgr->aux = aux;
@@ -4382,6 +4447,7 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
mutex_destroy(&mgr->payload_lock);
mutex_destroy(&mgr->qlock);
mutex_destroy(&mgr->lock);
+ mutex_destroy(&mgr->up_req_lock);
}
EXPORT_SYMBOL(drm_dp_mst_topology_mgr_destroy);

diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
index 8ba2a01324bb..7d80c38ee00e 100644
--- a/include/drm/drm_dp_mst_helper.h
+++ b/include/drm/drm_dp_mst_helper.h
@@ -597,6 +597,22 @@ struct drm_dp_mst_topology_mgr {
* devices, needed to avoid locking inversion.
*/
struct work_struct delayed_destroy_work;
+
+ /**
+ * @up_req_list: List of pending up requests from the topology that
+ * need to be processed, in chronological order.
+ */
+ struct list_head up_req_list;
+ /**
+ * @up_req_lock: Protects @up_req_list
+ */
+ struct mutex up_req_lock;
+ /**
+ * @up_req_work: Work item to process up requests received from the
+ * topology. Needed to avoid blocking hotplug handling and sideband
+ * transmissions.
+ */
+ struct work_struct up_req_work;
};

int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
--
2.21.0