Re: [PATCH v5 05/14] drm/dp_mst: Add probe_lock

From: Sean Paul
Date: Tue Oct 22 2019 - 12:06:14 EST


On Mon, Oct 21, 2019 at 10:36:00PM -0400, Lyude Paul wrote:
> Currently, MST lacks locking in a lot of places that really should have
> some sort of locking. Hotplugging and link address code paths are some
> of the offenders here, as there is actually nothing preventing us from
> running a link address probe while at the same time handling a
> connection status update request - something that's likely always been
> possible but never seen in the wild because hotplugging has been broken
> for ages now (with the exception of amdgpu, for reasons I don't think
> are worth digging into very far).
>
> Note: I'm going to start using the term "in-memory topology layout" here
> to refer to drm_dp_mst_port->mstb and drm_dp_mst_branch->ports.
>
> Locking in these places is a little tougher then it looks though.
> Generally we protect anything having to do with the in-memory topology
> layout under &mgr->lock. But this becomes nearly impossible to do from
> the context of link address probes due to the fact that &mgr->lock is
> usually grabbed under random various modesetting locks, meaning that
> there's no way we can just invert the &mgr->lock order and keep it
> locked throughout the whole process of updating the topology.
>
> Luckily there are only two workers which can modify the in-memory
> topology layout: drm_dp_mst_up_req_work() and
> drm_dp_mst_link_probe_work(), meaning as long as we prevent these two
> workers from traveling the topology layout in parallel with the intent
> of updating it we don't need to worry about grabbing &mgr->lock in these
> workers for reads. We only need to grab &mgr->lock in these workers for
> writes, so that readers outside these two workers are still protected
> from the topology layout changing beneath them.
>
> So, add the new &mgr->probe_lock and use it in both
> drm_dp_mst_link_probe_work() and drm_dp_mst_up_req_work(). Additionally,
> add some more detailed explanations for how this locking is intended to
> work to drm_dp_mst_port->mstb and drm_dp_mst_branch->ports.

This seems like a good solution to me, thanks for working this through!

Any chance we could add a WARN_ON(!mutex_is_locked(&mgr->probe_lock)); somewhere
centrally called by all paths modifying the in-memory topology layout?
drm_dp_add_port perhaps? That way we have a fallback in case something else
starts mucking with the topology.

Other than that,

Reviewed-by: Sean Paul <sean@xxxxxxxxxx>


>
> Signed-off-by: Lyude Paul <lyude@xxxxxxxxxx>
> Cc: Juston Li <juston.li@xxxxxxxxx>
> Cc: Imre Deak <imre.deak@xxxxxxxxx>
> Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx>
> Cc: Harry Wentland <hwentlan@xxxxxxx>
> Cc: Daniel Vetter <daniel.vetter@xxxxxxxx>
> Cc: Sean Paul <sean@xxxxxxxxxx>
> ---
> drivers/gpu/drm/drm_dp_mst_topology.c | 28 ++++++++++++++---------
> include/drm/drm_dp_mst_helper.h | 32 +++++++++++++++++++++++----
> 2 files changed, 46 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
> index 08c316a727df..11d842f0bff5 100644
> --- a/drivers/gpu/drm/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/drm_dp_mst_topology.c
> @@ -2147,37 +2147,40 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m
> struct drm_dp_mst_branch *mstb)
> {
> struct drm_dp_mst_port *port;
> - struct drm_dp_mst_branch *mstb_child;
> +
> if (!mstb->link_address_sent)
> drm_dp_send_link_address(mgr, mstb);
>
> list_for_each_entry(port, &mstb->ports, next) {
> - if (port->input)
> - continue;
> + struct drm_dp_mst_branch *mstb_child = NULL;
>
> - if (!port->ddps)
> + if (port->input || !port->ddps)
> continue;
>
> if (!port->available_pbn)
> drm_dp_send_enum_path_resources(mgr, mstb, port);
>
> - if (port->mstb) {
> + if (port->mstb)
> mstb_child = drm_dp_mst_topology_get_mstb_validated(
> mgr, port->mstb);
> - if (mstb_child) {
> - drm_dp_check_and_send_link_address(mgr, mstb_child);
> - drm_dp_mst_topology_put_mstb(mstb_child);
> - }
> +
> + if (mstb_child) {
> + drm_dp_check_and_send_link_address(mgr, mstb_child);
> + drm_dp_mst_topology_put_mstb(mstb_child);
> }
> }
> }
>
> static void drm_dp_mst_link_probe_work(struct work_struct *work)
> {
> - struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, work);
> + struct drm_dp_mst_topology_mgr *mgr =
> + container_of(work, struct drm_dp_mst_topology_mgr, work);
> + struct drm_device *dev = mgr->dev;
> struct drm_dp_mst_branch *mstb;
> int ret;
>
> + mutex_lock(&mgr->probe_lock);
> +
> mutex_lock(&mgr->lock);
> mstb = mgr->mst_primary;
> if (mstb) {
> @@ -2190,6 +2193,7 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work)
> drm_dp_check_and_send_link_address(mgr, mstb);
> drm_dp_mst_topology_put_mstb(mstb);
> }
> + mutex_unlock(&mgr->probe_lock);
> }
>
> static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr,
> @@ -3313,6 +3317,7 @@ static void drm_dp_mst_up_req_work(struct work_struct *work)
> up_req_work);
> struct drm_dp_pending_up_req *up_req;
>
> + mutex_lock(&mgr->probe_lock);
> while (true) {
> mutex_lock(&mgr->up_req_lock);
> up_req = list_first_entry_or_null(&mgr->up_req_list,
> @@ -3328,6 +3333,7 @@ static void drm_dp_mst_up_req_work(struct work_struct *work)
> drm_dp_mst_process_up_req(mgr, up_req);
> kfree(up_req);
> }
> + mutex_unlock(&mgr->probe_lock);
> }
>
> static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
> @@ -4349,6 +4355,7 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr,
> mutex_init(&mgr->payload_lock);
> mutex_init(&mgr->delayed_destroy_lock);
> mutex_init(&mgr->up_req_lock);
> + mutex_init(&mgr->probe_lock);
> INIT_LIST_HEAD(&mgr->tx_msg_downq);
> INIT_LIST_HEAD(&mgr->destroy_port_list);
> INIT_LIST_HEAD(&mgr->destroy_branch_device_list);
> @@ -4414,6 +4421,7 @@ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr)
> mutex_destroy(&mgr->qlock);
> mutex_destroy(&mgr->lock);
> mutex_destroy(&mgr->up_req_lock);
> + mutex_destroy(&mgr->probe_lock);
> }
> EXPORT_SYMBOL(drm_dp_mst_topology_mgr_destroy);
>
> diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h
> index 7d80c38ee00e..bccb5514e0ef 100644
> --- a/include/drm/drm_dp_mst_helper.h
> +++ b/include/drm/drm_dp_mst_helper.h
> @@ -55,8 +55,6 @@ struct drm_dp_vcpi {
> * @num_sdp_stream_sinks: Number of stream sinks
> * @available_pbn: Available bandwidth for this port.
> * @next: link to next port on this branch device
> - * @mstb: branch device on this port, protected by
> - * &drm_dp_mst_topology_mgr.lock
> * @aux: i2c aux transport to talk to device connected to this port, protected
> * by &drm_dp_mst_topology_mgr.lock
> * @parent: branch device parent of this port
> @@ -92,7 +90,17 @@ struct drm_dp_mst_port {
> u8 num_sdp_stream_sinks;
> uint16_t available_pbn;
> struct list_head next;
> - struct drm_dp_mst_branch *mstb; /* pointer to an mstb if this port has one */
> + /**
> + * @mstb: the branch device connected to this port, if there is one.
> + * This should be considered protected for reading by
> + * &drm_dp_mst_topology_mgr.lock. There are two exceptions to this:
> + * &drm_dp_mst_topology_mgr.up_req_work and
> + * &drm_dp_mst_topology_mgr.work, which do not grab
> + * &drm_dp_mst_topology_mgr.lock during reads but are the only
> + * updaters of this list and are protected from writing concurrently
> + * by &drm_dp_mst_topology_mgr.probe_lock.
> + */
> + struct drm_dp_mst_branch *mstb;
> struct drm_dp_aux aux; /* i2c bus for this port? */
> struct drm_dp_mst_branch *parent;
>
> @@ -118,7 +126,6 @@ struct drm_dp_mst_port {
> * @lct: Link count total to talk to this branch device.
> * @num_ports: number of ports on the branch.
> * @msg_slots: one bit per transmitted msg slot.
> - * @ports: linked list of ports on this branch.
> * @port_parent: pointer to the port parent, NULL if toplevel.
> * @mgr: topology manager for this branch device.
> * @tx_slots: transmission slots for this device.
> @@ -156,6 +163,16 @@ struct drm_dp_mst_branch {
> int num_ports;
>
> int msg_slots;
> + /**
> + * @ports: the list of ports on this branch device. This should be
> + * considered protected for reading by &drm_dp_mst_topology_mgr.lock.
> + * There are two exceptions to this:
> + * &drm_dp_mst_topology_mgr.up_req_work and
> + * &drm_dp_mst_topology_mgr.work, which do not grab
> + * &drm_dp_mst_topology_mgr.lock during reads but are the only
> + * updaters of this list and are protected from updating the list
> + * concurrently by @drm_dp_mst_topology_mgr.probe_lock
> + */
> struct list_head ports;
>
> /* list of tx ops queue for this port */
> @@ -502,6 +519,13 @@ struct drm_dp_mst_topology_mgr {
> */
> struct mutex lock;
>
> + /**
> + * @probe_lock: Prevents @work and @up_req_work, the only writers of
> + * &drm_dp_mst_port.mstb and &drm_dp_mst_branch.ports, from racing
> + * while they update the topology.
> + */
> + struct mutex probe_lock;
> +
> /**
> * @mst_state: If this manager is enabled for an MST capable port. False
> * if no MST sink/branch devices is connected.
> --
> 2.21.0
>

--
Sean Paul, Software Engineer, Google / Chromium OS