Re: [PATCH v8 2/5] Coresight: Add trace_id function to retrieving the trace ID
From: Jie Gan
Date: Thu Jan 23 2025 - 05:03:55 EST
On 1/23/2025 5:47 PM, James Clark wrote:
On 23/01/2025 6:28 am, Jie Gan wrote:
On 1/13/2025 8:02 PM, James Clark wrote:
On 26/12/2024 1:10 am, Jie Gan wrote:
Add 'trace_id' function pointer in ops. It's responsible for
retrieving the device's trace ID.
Add 'struct cs_sink_data' to store the data that is needed by
coresight_enable_path/coresight_disable_path. The structure
will be transmitted to the helper and sink device to enable
related funcationalities.
The new cs_sink_data struct is quite specific to this change. Can we
start passing the path around to enable/disable functions, that will
allow devices to gather anything they want in the future. Because we
already have coresight_get_sink(path), coresight_get_source(path) etc.
And see below, but for this case we can also change the path struct
to contain the trace ID. Then all the new functions, allocations and
searches for the trace ID are unecessary. The CTCU will have access
to the path, and by the time its enable function is called the trace
ID is already assigned.
It's also easier to understand at which point a trace ID is
allocated, rather than adding the trace_id() callbacks from
everywhere which could potentially either read or allocate. I suppose
that's "safer" because maybe it's not allocated, but I can't see what
case it would happen in reverse.
Signed-off-by: Jie Gan <quic_jiegan@xxxxxxxxxxx>
---
drivers/hwtracing/coresight/coresight-core.c | 59 ++++++++++++++
+----
drivers/hwtracing/coresight/coresight-etb10.c | 3 +-
.../hwtracing/coresight/coresight-etm-perf.c | 37 ++++++++++--
.../coresight/coresight-etm3x-core.c | 30 ++++++++++
.../coresight/coresight-etm4x-core.c | 29 +++++++++
drivers/hwtracing/coresight/coresight-priv.h | 13 +++-
drivers/hwtracing/coresight/coresight-stm.c | 22 +++++++
drivers/hwtracing/coresight/coresight-sysfs.c | 24 +++++++-
.../hwtracing/coresight/coresight-tmc-etf.c | 3 +-
.../hwtracing/coresight/coresight-tmc-etr.c | 6 +-
drivers/hwtracing/coresight/coresight-tpda.c | 20 +++++++
drivers/hwtracing/coresight/coresight-trbe.c | 4 +-
drivers/hwtracing/coresight/ultrasoc-smb.c | 3 +-
include/linux/coresight.h | 6 ++
14 files changed, 234 insertions(+), 25 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/
hwtracing/coresight/coresight-core.c
index 0a9380350fb5..2e560b425fd4 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -23,6 +23,7 @@
#include "coresight-etm-perf.h"
#include "coresight-priv.h"
#include "coresight-syscfg.h"
+#include "coresight-trace-id.h"
/*
* Mutex used to lock all sysfs enable and disable actions and
loading and
@@ -331,12 +332,12 @@ static int coresight_enable_helper(struct
coresight_device *csdev,
return helper_ops(csdev)->enable(csdev, mode, data);
}
-static void coresight_disable_helper(struct coresight_device *csdev)
+static void coresight_disable_helper(struct coresight_device
*csdev, void *data)
{
- helper_ops(csdev)->disable(csdev, NULL);
+ helper_ops(csdev)->disable(csdev, data);
}
-static void coresight_disable_helpers(struct coresight_device *csdev)
+static void coresight_disable_helpers(struct coresight_device
*csdev, void *data)
{
int i;
struct coresight_device *helper;
@@ -344,7 +345,7 @@ static void coresight_disable_helpers(struct
coresight_device *csdev)
for (i = 0; i < csdev->pdata->nr_outconns; ++i) {
helper = csdev->pdata->out_conns[i]->dest_dev;
if (helper && coresight_is_helper(helper))
- coresight_disable_helper(helper);
+ coresight_disable_helper(helper, data);
}
}
@@ -361,7 +362,7 @@ static void coresight_disable_helpers(struct
coresight_device *csdev)
void coresight_disable_source(struct coresight_device *csdev, void
*data)
{
source_ops(csdev)->disable(csdev, data);
- coresight_disable_helpers(csdev);
+ coresight_disable_helpers(csdev, NULL);
}
EXPORT_SYMBOL_GPL(coresight_disable_source);
@@ -371,7 +372,8 @@ EXPORT_SYMBOL_GPL(coresight_disable_source);
* disabled.
*/
static void coresight_disable_path_from(struct list_head *path,
- struct coresight_node *nd)
+ struct coresight_node *nd,
+ void *sink_data)
{
u32 type;
struct coresight_device *csdev, *parent, *child;
@@ -417,13 +419,13 @@ static void coresight_disable_path_from(struct
list_head *path,
}
/* Disable all helpers adjacent along the path last */
- coresight_disable_helpers(csdev);
+ coresight_disable_helpers(csdev, sink_data);
}
}
-void coresight_disable_path(struct list_head *path)
+void coresight_disable_path(struct list_head *path, void *sink_data)
{
- coresight_disable_path_from(path, NULL);
+ coresight_disable_path_from(path, NULL, sink_data);
}
EXPORT_SYMBOL_GPL(coresight_disable_path);
@@ -505,10 +507,47 @@ int coresight_enable_path(struct list_head
*path, enum cs_mode mode,
out:
return ret;
err:
- coresight_disable_path_from(path, nd);
+ coresight_disable_path_from(path, nd, sink_data);
goto out;
}
+int coresight_read_traceid(struct list_head *path, enum cs_mode mode,
+ struct coresight_trace_id_map *id_map)
+{
+ int trace_id, type;
+ struct coresight_device *csdev;
+ struct coresight_node *nd;
+
+ list_for_each_entry(nd, path, link) {
What do you think about also changing the path to this:
struct coresight_path {
struct list_head *path,
u8 trace_id
};
That would avoid having to traverse the path on every enable and
would remove this function. You could also cache the trace ID in the
CTCU for a similar benefit, but it wouldn't remove the need to call
this at least once.
The expensive part should be the create path part, after that enable
and disable should be cheap because they happen on schedule for Perf
mode. We should be avoiding allocations and searches.
+ csdev = nd->csdev;
+ type = csdev->type;
+
+ switch (type) {
+ case CORESIGHT_DEV_TYPE_SOURCE:
+ if (source_ops(csdev)->trace_id != NULL) {
+ trace_id = source_ops(csdev)->trace_id(csdev,
+ mode,
+ id_map);
+ if (IS_VALID_CS_TRACE_ID(trace_id))
+ goto out;
+ }
+ break;
+ case CORESIGHT_DEV_TYPE_LINK:
+ if (link_ops(csdev)->trace_id != NULL) {
+ trace_id = link_ops(csdev)->trace_id(csdev);
+ if (IS_VALID_CS_TRACE_ID(trace_id))
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return -EINVAL;
+out:
+ return trace_id;
+}
+
struct coresight_device *coresight_get_sink(struct list_head *path)
{
struct coresight_device *csdev;
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/
drivers/ hwtracing/coresight/coresight-etb10.c
index aea9ac9c4bd0..904b5531c256 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -173,7 +173,8 @@ static int etb_enable_perf(struct
coresight_device *csdev, void *data)
pid_t pid;
unsigned long flags;
struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
- struct perf_output_handle *handle = data;
+ struct cs_sink_data *sink_data = (struct cs_sink_data *)data;
+ struct perf_output_handle *handle = sink_data->handle;
struct cs_buffers *buf = etm_perf_sink_config(handle);
spin_lock_irqsave(&drvdata->spinlock, flags);
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/
drivers/hwtracing/coresight/coresight-etm-perf.c
index ad6a8f4b70b6..e676edd42ddc 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -459,6 +459,7 @@ static void etm_event_start(struct perf_event
*event, int flags)
struct perf_output_handle *handle = &ctxt->handle;
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
struct list_head *path;
+ struct cs_sink_data *sink_data = NULL;
u64 hw_id;
u8 trace_id;
@@ -498,9 +499,20 @@ static void etm_event_start(struct perf_event
*event, int flags)
if (WARN_ON_ONCE(!sink))
goto fail_end_stop;
+ sink_data = kzalloc(sizeof(*sink_data), GFP_KERNEL);
kzalloc can't be called from here. Check dmesg for the warning.
That's another reason to do this change on the path. Because the path
is allocated on etm_setup_aux() where allocations are allowed.
Hi, James
I just tried with following command and did not observe any warning
info from dmesg, may I ask what's the issue may suffered here?
You might be missing some debugging configs like lockdep etc. The
warning is that etm_event_start() is a non-sleepable context and kzalloc
is sleepable. Even if it wasn't an error we still wouldn't want to do
it, etm_event_start() and stop are called too frequently.
Sure, wiill check the issue again.
root@qemuarm64:/data# ./perf record -e cs_etm/@tmc_etr0/ --per-thread ls
configs kernel.txt logs lost+found misc
perf perf.data perf.data.old root time
tzstorage weston
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.145 MB perf.data ]
For the new patch version, I implemented an 8-bit hash table in the
CTCU driver data to handle situations where multiple TPDMs are
connected to the same TPDA device have been enabled. As we know, TPDMs
share the trace_id of the TPDA device they are connected to. If we
reset the bit based on the trace_id without checking the enabled
refcount, it causes an issue where trace data from other enabled TPDM
devices (sharing the same trace_id) cannot enter the ETR buffer, as it
gets filtered out by the CTCU.
I think sharing the code or a diagram might be easier to follow here.
The mention of a refcount makes sense but I don't follow the need for a
hash table. There are other places where single devices are shared by
multiple paths, like funnels, and they're all done with refcounts.
Suppose we have two etr devices enabled, TPDM0 with trace_id 3(trace_id
of TPDA0) with etr0 and TPDM1 with trace_id 3(trace_id of TPDA0) with
etr1 have been enabled. So the current refcnt for TPDA device is 2, but
actually, the refcnt for each sink should be 1, right? So I cannot check
the refcnt from TPDA's coresight_device. That's why I implemented a hash
table, use trace_id as key. We can check the refcnt for each trace_id
for each sink with the solution.
Here is the code snippet:
Entry for hash table:
struct ctcu_traceid_entry {
struct hlist_node hlist;
atomic_t refcnt[ATID_MAX_NUM];
u8 trace_id;
};
Usage of hash table:
static struct ctcu_traceid_entry *ctcu_search_traceid_entry(struct
coresight_device *csdev,
u8 trace_id)
{
struct ctcu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
struct ctcu_traceid_entry *entry, *new_entry;
int i;
new_entry = kzalloc(sizeof(struct ctcu_traceid_entry), GFP_KERNEL);
if (!new_entry)
return NULL;
new_entry->trace_id = trace_id;
for (i = 0; i < ATID_MAX_NUM; i++)
atomic_set(&new_entry->refcnt[i], 0);
guard(raw_spinlock_irqsave)(&drvdata->spin_lock);
hash_for_each_possible(drvdata->traceid_htable, entry, hlist,
trace_id) {
if (entry->trace_id == trace_id) {
kfree(new_entry);
return entry;
}
}
hash_add(drvdata->traceid_htable, &new_entry->hlist, trace_id);
return new_entry;
}
/*
* ctcu_set_etr_traceid: Retrieve the ATID offset and trace ID.
*
* Returns 0 indicates success. None-zero result means failure.
*/
static int ctcu_set_etr_traceid(struct coresight_device *csdev, struct
coresight_path *cs_path,
bool enable)
{
struct ctcu_traceid_entry *entry;
struct coresight_device *sink = coresight_get_sink(cs_path->path);
int port_num;
entry = ctcu_search_traceid_entry(csdev, cs_path->trace_id);
if ((sink == NULL) || !IS_VALID_CS_TRACE_ID(cs_path->trace_id)
|| (entry == NULL)) {
dev_err(&csdev->dev, "Invalid parameters\n");
return -EINVAL;
}
port_num = ctcu_get_active_port(sink, csdev);
if (port_num < 0)
return -EINVAL;
/*
* Skip the disable session if more than one TPDM device that
* connected to the same TPDA device has been enabled.
*/
if (enable)
atomic_inc(&entry->refcnt[port_num]);
else {
if (atomic_dec_return(&entry->refcnt[port_num]) > 0) {
dev_dbg(&csdev->dev, "Skip the disable session\n");
return 0;
}
ctcu_rm_traceid_entry(csdev, cs_path->trace_id);
}
dev_dbg(&csdev->dev, "traceid is %d\n", cs_path->trace_id);
return __ctcu_set_etr_traceid(csdev, cs_path->trace_id,
port_num, enable);
}
Or, I also have another solution, create an multi-element atomic array
like refcnt[MAX_ETR_NUM][CORESIGHT_TRACE_ID_RES_TOP]. So we can allocate
memory for the array in CTCU's probe function. It will cost like almost
1k byte.
Thanks,
Jie
I need allocate memory when implement hash table(add/remove key entry)
in coresight_enable_path flow, but you mentioned we cannot call
kzalloc from here.
Thanks,
Jie
Why not allocate on setup_aux()? That's called by userspace before the
session starts, and then the path is fixed from that point onwards so
you shouldn't need to do any more allocations. That's how it's setup
currently anyway.