Re: [PATCH v8 2/5] Coresight: Add trace_id function to retrieving the trace ID

From: Jie Gan
Date: Thu Jan 23 2025 - 05:03:55 EST

Next message: Hari Bathini: "Re: [PATCH v2 1/6] kexec: Initialize ELF lowest address to ULONG_MAX"
Previous message: Christian Schrefl: "Re: [PATCH 2/3] rust: miscdevice: Add additional data to MiscDeviceRegistration"
In reply to: James Clark: "Re: [PATCH v8 2/5] Coresight: Add trace_id function to retrieving the trace ID"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On 1/23/2025 5:47 PM, James Clark wrote:

On 23/01/2025 6:28 am, Jie Gan wrote:

On 1/13/2025 8:02 PM, James Clark wrote:

On 26/12/2024 1:10 am, Jie Gan wrote:

Add 'trace_id' function pointer in ops. It's responsible for
retrieving the device's trace ID.

Add 'struct cs_sink_data' to store the data that is needed by
coresight_enable_path/coresight_disable_path. The structure
will be transmitted to the helper and sink device to enable
related funcationalities.

The new cs_sink_data struct is quite specific to this change. Can we start passing the path around to enable/disable functions, that will allow devices to gather anything they want in the future. Because we already have coresight_get_sink(path), coresight_get_source(path) etc.

And see below, but for this case we can also change the path struct to contain the trace ID. Then all the new functions, allocations and searches for the trace ID are unecessary. The CTCU will have access to the path, and by the time its enable function is called the trace ID is already assigned.

It's also easier to understand at which point a trace ID is allocated, rather than adding the trace_id() callbacks from everywhere which could potentially either read or allocate. I suppose that's "safer" because maybe it's not allocated, but I can't see what case it would happen in reverse.

Signed-off-by: Jie Gan <quic_jiegan@xxxxxxxxxxx>
---
drivers/hwtracing/coresight/coresight-core.c | 59 ++++++++++++++ +----
drivers/hwtracing/coresight/coresight-etb10.c | 3 +-
.../hwtracing/coresight/coresight-etm-perf.c | 37 ++++++++++--
.../coresight/coresight-etm3x-core.c          | 30 ++++++++++
.../coresight/coresight-etm4x-core.c          | 29 +++++++++
drivers/hwtracing/coresight/coresight-priv.h | 13 +++-
drivers/hwtracing/coresight/coresight-stm.c   | 22 +++++++
drivers/hwtracing/coresight/coresight-sysfs.c | 24 +++++++-
.../hwtracing/coresight/coresight-tmc-etf.c   | 3 +-
.../hwtracing/coresight/coresight-tmc-etr.c   | 6 +-
drivers/hwtracing/coresight/coresight-tpda.c | 20 +++++++
drivers/hwtracing/coresight/coresight-trbe.c | 4 +-
drivers/hwtracing/coresight/ultrasoc-smb.c    | 3 +-
include/linux/coresight.h                     | 6 ++
14 files changed, 234 insertions(+), 25 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/ hwtracing/coresight/coresight-core.c
index 0a9380350fb5..2e560b425fd4 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -23,6 +23,7 @@
#include "coresight-etm-perf.h"
#include "coresight-priv.h"
#include "coresight-syscfg.h"
+#include "coresight-trace-id.h"
/*
   * Mutex used to lock all sysfs enable and disable actions and loading and
@@ -331,12 +332,12 @@ static int coresight_enable_helper(struct coresight_device *csdev,
      return helper_ops(csdev)->enable(csdev, mode, data);
}
-static void coresight_disable_helper(struct coresight_device *csdev)
+static void coresight_disable_helper(struct coresight_device *csdev, void *data)
{
-    helper_ops(csdev)->disable(csdev, NULL);
+    helper_ops(csdev)->disable(csdev, data);
}
-static void coresight_disable_helpers(struct coresight_device *csdev)
+static void coresight_disable_helpers(struct coresight_device *csdev, void *data)
{
      int i;
      struct coresight_device *helper;
@@ -344,7 +345,7 @@ static void coresight_disable_helpers(struct coresight_device *csdev)
      for (i = 0; i < csdev->pdata->nr_outconns; ++i) {
          helper = csdev->pdata->out_conns[i]->dest_dev;
          if (helper && coresight_is_helper(helper))
-            coresight_disable_helper(helper);
+            coresight_disable_helper(helper, data);
      }
}
@@ -361,7 +362,7 @@ static void coresight_disable_helpers(struct coresight_device *csdev)
void coresight_disable_source(struct coresight_device *csdev, void *data)
{
      source_ops(csdev)->disable(csdev, data);
-    coresight_disable_helpers(csdev);
+    coresight_disable_helpers(csdev, NULL);
}
EXPORT_SYMBOL_GPL(coresight_disable_source);
@@ -371,7 +372,8 @@ EXPORT_SYMBOL_GPL(coresight_disable_source);
   * disabled.
   */
static void coresight_disable_path_from(struct list_head *path,
-                    struct coresight_node *nd)
+                    struct coresight_node *nd,
+                    void *sink_data)
{
      u32 type;
      struct coresight_device *csdev, *parent, *child;
@@ -417,13 +419,13 @@ static void coresight_disable_path_from(struct list_head *path,
          }
          /* Disable all helpers adjacent along the path last */
-        coresight_disable_helpers(csdev);
+        coresight_disable_helpers(csdev, sink_data);
      }
}
-void coresight_disable_path(struct list_head *path)
+void coresight_disable_path(struct list_head *path, void *sink_data)
{
-    coresight_disable_path_from(path, NULL);
+    coresight_disable_path_from(path, NULL, sink_data);
}
EXPORT_SYMBOL_GPL(coresight_disable_path);
@@ -505,10 +507,47 @@ int coresight_enable_path(struct list_head *path, enum cs_mode mode,
out:
      return ret;
err:
-    coresight_disable_path_from(path, nd);
+    coresight_disable_path_from(path, nd, sink_data);
      goto out;
}
+int coresight_read_traceid(struct list_head *path, enum cs_mode mode,
+               struct coresight_trace_id_map *id_map)
+{
+    int trace_id, type;
+    struct coresight_device *csdev;
+    struct coresight_node *nd;
+
+    list_for_each_entry(nd, path, link) {

What do you think about also changing the path to this:

  struct coresight_path {
    struct list_head *path,
    u8 trace_id
  };

That would avoid having to traverse the path on every enable and would remove this function. You could also cache the trace ID in the CTCU for a similar benefit, but it wouldn't remove the need to call this at least once.

The expensive part should be the create path part, after that enable and disable should be cheap because they happen on schedule for Perf mode. We should be avoiding allocations and searches.

+        csdev = nd->csdev;
+        type = csdev->type;
+
+        switch (type) {
+        case CORESIGHT_DEV_TYPE_SOURCE:
+            if (source_ops(csdev)->trace_id != NULL) {
+                trace_id = source_ops(csdev)->trace_id(csdev,
+                                       mode,
+                                       id_map);
+                if (IS_VALID_CS_TRACE_ID(trace_id))
+                    goto out;
+            }
+            break;
+        case CORESIGHT_DEV_TYPE_LINK:
+            if (link_ops(csdev)->trace_id != NULL) {
+                trace_id = link_ops(csdev)->trace_id(csdev);
+                if (IS_VALID_CS_TRACE_ID(trace_id))
+                    goto out;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+    return -EINVAL;
+out:
+    return trace_id;
+}
+
struct coresight_device *coresight_get_sink(struct list_head *path)
{
      struct coresight_device *csdev;
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/ drivers/ hwtracing/coresight/coresight-etb10.c
index aea9ac9c4bd0..904b5531c256 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -173,7 +173,8 @@ static int etb_enable_perf(struct coresight_device *csdev, void *data)
      pid_t pid;
      unsigned long flags;
      struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-    struct perf_output_handle *handle = data;
+    struct cs_sink_data *sink_data = (struct cs_sink_data *)data;
+    struct perf_output_handle *handle = sink_data->handle;
      struct cs_buffers *buf = etm_perf_sink_config(handle);
      spin_lock_irqsave(&drvdata->spinlock, flags);
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/ drivers/hwtracing/coresight/coresight-etm-perf.c
index ad6a8f4b70b6..e676edd42ddc 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -459,6 +459,7 @@ static void etm_event_start(struct perf_event *event, int flags)
      struct perf_output_handle *handle = &ctxt->handle;
      struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
      struct list_head *path;
+    struct cs_sink_data *sink_data = NULL;
      u64 hw_id;
      u8 trace_id;
@@ -498,9 +499,20 @@ static void etm_event_start(struct perf_event *event, int flags)
      if (WARN_ON_ONCE(!sink))
          goto fail_end_stop;
+    sink_data = kzalloc(sizeof(*sink_data), GFP_KERNEL);

kzalloc can't be called from here. Check dmesg for the warning. That's another reason to do this change on the path. Because the path is allocated on etm_setup_aux() where allocations are allowed.

Hi, James
I just tried with following command and did not observe any warning info from dmesg, may I ask what's the issue may suffered here?

You might be missing some debugging configs like lockdep etc. The warning is that etm_event_start() is a non-sleepable context and kzalloc is sleepable. Even if it wasn't an error we still wouldn't want to do it, etm_event_start() and stop are called too frequently.

Sure, wiill check the issue again.

root@qemuarm64:/data# ./perf record -e cs_etm/@tmc_etr0/ --per-thread ls
configs kernel.txt logs lost+found misc perf perf.data perf.data.old root time tzstorage weston
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.145 MB perf.data ]

For the new patch version, I implemented an 8-bit hash table in the CTCU driver data to handle situations where multiple TPDMs are connected to the same TPDA device have been enabled. As we know, TPDMs share the trace_id of the TPDA device they are connected to. If we reset the bit based on the trace_id without checking the enabled refcount, it causes an issue where trace data from other enabled TPDM devices (sharing the same trace_id) cannot enter the ETR buffer, as it gets filtered out by the CTCU.
I think sharing the code or a diagram might be easier to follow here. The mention of a refcount makes sense but I don't follow the need for a hash table. There are other places where single devices are shared by multiple paths, like funnels, and they're all done with refcounts.

Suppose we have two etr devices enabled, TPDM0 with trace_id 3(trace_id of TPDA0) with etr0 and TPDM1 with trace_id 3(trace_id of TPDA0) with etr1 have been enabled. So the current refcnt for TPDA device is 2, but actually, the refcnt for each sink should be 1, right? So I cannot check the refcnt from TPDA's coresight_device. That's why I implemented a hash table, use trace_id as key. We can check the refcnt for each trace_id for each sink with the solution.

Here is the code snippet:
Entry for hash table:
struct ctcu_traceid_entry {
struct hlist_node hlist;
atomic_t refcnt[ATID_MAX_NUM];
u8 trace_id;
};

Usage of hash table:

static struct ctcu_traceid_entry *ctcu_search_traceid_entry(struct coresight_device *csdev,
u8 trace_id)
{
struct ctcu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct ctcu_traceid_entry *entry, *new_entry;
int i;

new_entry = kzalloc(sizeof(struct ctcu_traceid_entry), GFP_KERNEL);
if (!new_entry)
return NULL;

new_entry->trace_id = trace_id;
for (i = 0; i < ATID_MAX_NUM; i++)
atomic_set(&new_entry->refcnt[i], 0);

guard(raw_spinlock_irqsave)(&drvdata->spin_lock);
hash_for_each_possible(drvdata->traceid_htable, entry, hlist, trace_id) {
if (entry->trace_id == trace_id) {
kfree(new_entry);
return entry;
}
}
hash_add(drvdata->traceid_htable, &new_entry->hlist, trace_id);

return new_entry;
}

/*
* ctcu_set_etr_traceid: Retrieve the ATID offset and trace ID.
*
* Returns 0 indicates success. None-zero result means failure.
*/
static int ctcu_set_etr_traceid(struct coresight_device *csdev, struct coresight_path *cs_path,
bool enable)
{
struct ctcu_traceid_entry *entry;
struct coresight_device *sink = coresight_get_sink(cs_path->path);
int port_num;

entry = ctcu_search_traceid_entry(csdev, cs_path->trace_id);
if ((sink == NULL) || !IS_VALID_CS_TRACE_ID(cs_path->trace_id) || (entry == NULL)) {
dev_err(&csdev->dev, "Invalid parameters\n");
return -EINVAL;
}

port_num = ctcu_get_active_port(sink, csdev);
if (port_num < 0)
return -EINVAL;

/*
* Skip the disable session if more than one TPDM device that
* connected to the same TPDA device has been enabled.
*/
if (enable)
atomic_inc(&entry->refcnt[port_num]);
else {
if (atomic_dec_return(&entry->refcnt[port_num]) > 0) {
dev_dbg(&csdev->dev, "Skip the disable session\n");
return 0;
}
ctcu_rm_traceid_entry(csdev, cs_path->trace_id);
}

dev_dbg(&csdev->dev, "traceid is %d\n", cs_path->trace_id);

return __ctcu_set_etr_traceid(csdev, cs_path->trace_id, port_num, enable);
}

Or, I also have another solution, create an multi-element atomic array like refcnt[MAX_ETR_NUM][CORESIGHT_TRACE_ID_RES_TOP]. So we can allocate memory for the array in CTCU's probe function. It will cost like almost 1k byte.

Thanks,
Jie

I need allocate memory when implement hash table(add/remove key entry) in coresight_enable_path flow, but you mentioned we cannot call kzalloc from here.

Thanks,
Jie

Why not allocate on setup_aux()? That's called by userspace before the session starts, and then the path is fixed from that point onwards so you shouldn't need to do any more allocations. That's how it's setup currently anyway.

Next message: Hari Bathini: "Re: [PATCH v2 1/6] kexec: Initialize ELF lowest address to ULONG_MAX"
Previous message: Christian Schrefl: "Re: [PATCH 2/3] rust: miscdevice: Add additional data to MiscDeviceRegistration"
In reply to: James Clark: "Re: [PATCH v8 2/5] Coresight: Add trace_id function to retrieving the trace ID"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]