Re: [PATCH v2] coresight: core: Fix coresight device probe failure issue

From: Suzuki K Poulose
Date: Tue Mar 08 2022 - 08:36:40 EST


On 08/03/2022 11:56, Suzuki K Poulose wrote:
Hi Jinlong

On 04/03/2022 08:23, Mao Jinlong wrote:
It is possibe that probe failure issue happens when the device
and its child_device's probe happens at the same time.
In coresight_make_links, has_conns_grp is true for parent, but
has_conns_grp is false for child device as has_conns_grp is set
to true in coresight_create_conns_sysfs_group. The probe of parent
device will fail at this condition. Add has_conns_grp check for
child device before make the links and make the process from
device_register to connection_create be atomic to avoid this
probe failure issue.

Suggested-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx>
Suggested-by: Mike Leach <mike.leach@xxxxxxxxxx>
Signed-off-by: Mao Jinlong <quic_jinlmao@xxxxxxxxxxx>
---
  drivers/hwtracing/coresight/coresight-core.c | 12 ++++++------
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 88653d1c06a4..b3e3bc59c09b 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1382,7 +1382,7 @@ static int coresight_fixup_device_conns(struct coresight_device *csdev)
              continue;
          conn->child_dev =
              coresight_find_csdev_by_fwnode(conn->child_fwnode);
-        if (conn->child_dev) {
+        if (conn->child_dev && conn->child_dev->has_conns_grp) {
              ret = coresight_make_links(csdev, conn,
                             conn->child_dev);
              if (ret)
@@ -1594,7 +1594,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
      refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
      if (!refcnts) {
          ret = -ENOMEM;
-        goto err_free_csdev;
+        kfree(csdev);
+        goto err_out;
      }
      csdev->refcnt = refcnts;
@@ -1619,8 +1620,10 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
      csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
      dev_set_name(&csdev->dev, "%s", desc->name);
+    mutex_lock(&coresight_mutex);
      ret = device_register(&csdev->dev);
      if (ret) {
+        mutex_unlock(&coresight_mutex);
          put_device(&csdev->dev);
          /*
           * All resources are free'd explicitly via
@@ -1634,6 +1637,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
          ret = etm_perf_add_symlink_sink(csdev);
          if (ret) {
+            mutex_unlock(&coresight_mutex);
              device_unregister(&csdev->dev);
              /*
               * As with the above, all resources are free'd
@@ -1645,8 +1649,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
          }
      }
-    mutex_lock(&coresight_mutex);
-
      ret = coresight_create_conns_sysfs_group(csdev);
      if (!ret)
          ret = coresight_fixup_device_conns(csdev);
@@ -1663,8 +1665,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
      return csdev;
-err_free_csdev:
-    kfree(csdev);
  err_out:
      /* Cleanup the connection information */
      coresight_release_platform_data(NULL, desc->pdata);

Could we consolidate the unlock sequence to a single point with something like this (untested):


diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index af00dca8d1ac..198ee140c6e6 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1571,6 +1571,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
     int nr_refcnts = 1;
     atomic_t *refcnts = NULL;
     struct coresight_device *csdev;
+    bool registered = false;

     csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
     if (!csdev) {
@@ -1591,7 +1592,8 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
     refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
     if (!refcnts) {
         ret = -ENOMEM;
-        goto err_free_csdev;
+        kfree(csdev);
+        goto err_out;
     }

     csdev->refcnt = refcnts;
@@ -1616,6 +1618,13 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
     csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
     dev_set_name(&csdev->dev, "%s", desc->name);

+    /*
+     * Make sure the device registration and the connection fixup
+     * are synchronised, so that we don't see uninitialised devices
+     * on the coresight bus while trying to resolve the connections.
+     */
+    mutex_lock(&coresight_mutex);
+
     ret = device_register(&csdev->dev);
     if (ret) {
         put_device(&csdev->dev);
@@ -1623,7 +1632,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
          * All resources are free'd explicitly via
          * coresight_device_release(), triggered from put_device().
          */
-        goto err_out;
+        goto out_unlock;
     }

     if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
@@ -1638,11 +1647,11 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
              * from put_device(), which is in turn called from
              * function device_unregister().
              */
-            goto err_out;
+            goto out_unlock;
         }
     }
-
-    mutex_lock(&coresight_mutex);
+    /* Device is now registered */
+    registered = true;

     ret = coresight_create_conns_sysfs_group(csdev);
     if (!ret)
@@ -1652,16 +1661,15 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
     if (!ret && cti_assoc_ops && cti_assoc_ops->add)
         cti_assoc_ops->add(csdev);

+out_unlock:
     mutex_unlock(&coresight_mutex);
-    if (ret) {
+    /* Success */
+    if (!ret)
+        return csdev;
+
+    /* Unregister the device if needed */
+    if (registered)
         coresight_unregister(csdev);
-        return ERR_PTR(ret);
-    }

Apologies, this should be preserved :

i.e,
if (registered) {
coresight_unregister(csdev);
return ERR_PTR(ret);
}

Thanks
Suzuki