Re: [PATCH RESEND v4 03/11] perf header: Support CPU DOMAIN relation info
From: Swapnil Sapkal
Date: Mon Jan 19 2026 - 13:12:13 EST
Hi Namhyung,
On 09-01-2026 16:54, Swapnil Sapkal wrote:
Hello Namhyung,
On 03-01-2026 03:56, Namhyung Kim wrote:
On Tue, Sep 09, 2025 at 11:42:19AM +0000, Swapnil Sapkal wrote:
'/proc/schedstat' gives the info about load balancing statistics within
a given domain. It also contains the cpu_mask giving information about
the sibling cpus and domain names after schedstat version 17. Storing
this information in perf header will help tools like `perf sched stats`
for better analysis.
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@xxxxxxx>
---
.../Documentation/perf.data-file-format.txt | 17 +
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.h | 16 +
tools/perf/util/header.c | 304 ++++++++++++++++++
tools/perf/util/header.h | 1 +
tools/perf/util/util.c | 42 +++
tools/perf/util/util.h | 3 +
7 files changed, 384 insertions(+)
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/ tools/perf/Documentation/perf.data-file-format.txt
index cd95ba09f727..92dbba1003cf 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -437,6 +437,23 @@ struct {
} [nr_pmu];
};
+ HEADER_CPU_DOMAIN_INFO = 32,
+
+List of cpu-domain relation info. The format of the data is as below.
+
+struct domain_info {
+ int domain;
+ char dname[];
+ char cpumask[];
+ char cpulist[];
+};
+
+struct cpu_domain_info {
+ int cpu;
+ int nr_domains;
+ struct domain_info domains[];
+};
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index a114b3fa1bea..f43a7ec44b5f 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2058,6 +2058,7 @@ static bool keep_feat(int feat)
case HEADER_CLOCK_DATA:
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
+ case HEADER_CPU_DOMAIN_INFO:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index e00179787a34..71034c4b4488 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -54,6 +54,19 @@ struct pmu_caps {
char *pmu_name;
};
+struct domain_info {
+ u32 domain;
+ char *dname;
+ char *cpumask;
+ char *cpulist;
+};
+
+struct cpu_domain_map {
+ u32 cpu;
+ u32 nr_domains;
+ struct domain_info **domains;
+};
+
typedef const char *(arch_syscalls__strerrno_t)(int err);
struct perf_env {
@@ -70,6 +83,8 @@ struct perf_env {
unsigned int max_branches;
unsigned int br_cntr_nr;
unsigned int br_cntr_width;
+ unsigned int schedstat_version;
+ unsigned int max_sched_domains;
int kernel_is_64_bit;
int nr_cmdline;
@@ -92,6 +107,7 @@ struct perf_env {
char **cpu_pmu_caps;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
+ struct cpu_domain_map **cpu_domain;
int caches_cnt;
u32 comp_ratio;
u32 comp_ver;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4f2a6e10ed5c..7ff7434bac2c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1621,6 +1621,184 @@ static int write_pmu_caps(struct feat_fd *ff,
return 0;
}
+static void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr)
+{
+ for (u32 i = 0; i < nr; i++) {
+ if (cd_map[i]->domains) {
+ for (u32 j = 0; j < cd_map[i]->nr_domains; j++) {
+ struct domain_info *d_info = cd_map[i]->domains[j];
+
I'm not sure if it needs a NULL check for d_info before access.
Thanks for catching this. Yes we need a NULL check here.
If some of the cpus are offline then it is possible that some sched domains can be degenerated and for some cpus.
for e.g. cpu0, cpu128 are smt siblings. If cpu 128 is offline then SMT domain will be degenerated for cpu0 alone. All the other cpus will have a valid SMT domain. Thus SMT for the d_info can be NULL.
+ if (schedstat_version >= 17)
+ free(d_info->dname);
+
+ free(d_info->cpumask);
+ free(d_info->cpulist);
+ }
+ free(cd_map[i]->domains);
+ }
+ }
+
+ free(cd_map);
+}
+
+static struct cpu_domain_map **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains,
+ u32 nr)
+{
+ struct domain_info *domain_info;
+ struct cpu_domain_map **cd_map;
+ char dname[16], cpumask[256];
+ char cpulist[1024];
+ char *line = NULL;
+ u32 cpu, domain;
+ u32 dcount = 0;
+ size_t len;
+ FILE *fp;
+
+ fp = fopen("/proc/schedstat", "r");
+ if (!fp) {
+ pr_err("Failed to open /proc/schedstat\n");
+ return NULL;
+ }
+
+ cd_map = calloc(nr, sizeof(*cd_map));
+ if (!cd_map)
+ goto out;
+
+ while (getline(&line, &len, fp) > 0) {
+ int retval;
+
+ if (strncmp(line, "version", 7) == 0) {
+ retval = sscanf(line, "version %d\n", schedstat_version);
+ if (retval != 1)
+ continue;
+
+ } else if (strncmp(line, "cpu", 3) == 0) {
+ retval = sscanf(line, "cpu%u %*s", &cpu);
+ if (retval == 1) {
+ cd_map[cpu] = calloc(1, sizeof(*cd_map[cpu]));
+ if (!cd_map[cpu])
+ goto out_free_line;
+ cd_map[cpu]->cpu = cpu;
+ } else
+ continue;
+
+ dcount = 0;
+ } else if (strncmp(line, "domain", 6) == 0) {
+ dcount++;
+
+ cd_map[cpu]->domains = realloc(cd_map[cpu]->domains,
+ dcount * sizeof(domain_info));
+ if (!cd_map[cpu]->domains)
+ goto out_free_line;
Please use a temporary variable to save the result in order to not lose
the original pointer in case of failure.
Sure, will do.
+
+ domain_info = calloc(1, sizeof(*domain_info));
+ if (!domain_info)
+ goto out_free_line;
+
+ cd_map[cpu]->domains[dcount - 1] = domain_info;
+
+ if (*schedstat_version >= 17) {
+ retval = sscanf(line, "domain%u %s %s %*s", &domain, dname,
+ cpumask);
+ if (retval != 3)
+ continue;
+
+ domain_info->dname = calloc(strlen(dname) + 1, sizeof(char));
+ if (!domain_info->dname)
+ goto out_free_line;
+
+ strcpy(domain_info->dname, dname);
This can be simply:
domain_info->dname = strdup(dname);
Sure, will do.
+ } else {
+ retval = sscanf(line, "domain%u %s %*s", &domain, cpumask);
+ if (retval != 2)
+ continue;
+ }
+
+ domain_info->domain = domain;
+ if (domain > *max_sched_domains)
+ *max_sched_domains = domain;
+
+ domain_info->cpumask = calloc(strlen(cpumask) + 1, sizeof(char));
+ if (!domain_info->cpumask)
+ goto out_free_line;
+
+ strcpy(domain_info->cpumask, cpumask);
+
+ cpumask_to_cpulist(cpumask, cpulist);
+ domain_info->cpulist = calloc(strlen(cpulist) + 1, sizeof(char));
+ if (!domain_info->cpulist)
+ goto out_free_line;
All error paths should call free_cpu_domain_info() at some point and
free the intermediate domain_info properly.
Sure, will do.
Sorry, I missed to mention here that the parent function is taking care of freeing the memory by calling free_cpu_domain_info() in case of error path also. Because of which I have not addressed this in the next version.
--
Thanks and Regards,
Swapnil