Re: [PATCH v2 4/4] tools/perf: Fix perf bench numa testcase to check if CPU used to bind task is online
From: Athira Rajeev
Date: Sat Apr 09 2022 - 02:29:51 EST
> On 08-Apr-2022, at 5:56 PM, Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx> wrote:
>
> * Athira Rajeev <atrajeev@xxxxxxxxxxxxxxxxxx> [2022-04-06 23:21:13]:
>
>> Perf numa bench test fails with error:
>>
>> Testcase:
>> ./perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq
>> --thp 1 --no-data_rand_walk
>>
>> Failure snippet:
>> <<>>
>> Running 'numa/mem' benchmark:
>>
>> # Running main, "perf bench numa numa-mem -p 2 -t 1 -P 1024 -C 0,8
>> -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk"
>>
>> perf: bench/numa.c:333: bind_to_cpumask: Assertion `!(ret)' failed.
>> <<>>
>>
>> The Testcases uses CPU???s 0 and 8. In function "parse_setup_cpu_list",
>> There is check to see if cpu number is greater than max cpu???s possible
>> in the system ie via "if (bind_cpu_0 >= g->p.nr_cpus ||
>> bind_cpu_1 >= g->p.nr_cpus) {". But it could happen that system has
>> say 48 CPU???s, but only number of online CPU???s is 0-7. Other CPU???s
>> are offlined. Since "g->p.nr_cpus" is 48, so function will go ahead
>> and set bit for CPU 8 also in cpumask ( td->bind_cpumask).
>>
>> bind_to_cpumask function is called to set affinity using
>> sched_setaffinity and the cpumask. Since the CPU8 is not present,
>> set affinity will fail here with EINVAL. Fix this issue by adding a
>> check to make sure that, CPU???s provided in the input argument values
>> are online before proceeding further and skip the test. For this,
>> include new helper function "is_cpu_online" in
>> "tools/perf/util/header.c".
>>
>> Since "BIT(x)" definition will get included from header.h, remove
>> that from bench/numa.c
>>
>> Tested-by: Disha Goel <disgoel@xxxxxxxxxxxxxxxxxx>
>> Signed-off-by: Athira Rajeev <atrajeev@xxxxxxxxxxxxxxxxxx>
>> Reported-by: Disha Goel <disgoel@xxxxxxxxxxxxxxxxxx>
>
> Looks good to me.
> Reviewed-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
Hi Srikar,
Thanks for the review
Athira
>
>> ---
>> tools/perf/bench/numa.c | 8 ++++++--
>> tools/perf/util/header.c | 43 ++++++++++++++++++++++++++++++++++++++++
>> tools/perf/util/header.h | 1 +
>> 3 files changed, 50 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
>> index 29e41e32bd88..7992d79b3e41 100644
>> --- a/tools/perf/bench/numa.c
>> +++ b/tools/perf/bench/numa.c
>> @@ -34,6 +34,7 @@
>> #include <linux/numa.h>
>> #include <linux/zalloc.h>
>>
>> +#include "../util/header.h"
>> #include <numa.h>
>> #include <numaif.h>
>>
>> @@ -616,6 +617,11 @@ static int parse_setup_cpu_list(void)
>> return -1;
>> }
>>
>> + if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
>> + printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
>> + return -1;
>> + }
>> +
>> BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
>> BUG_ON(bind_cpu_0 > bind_cpu_1);
>>
>> @@ -786,8 +792,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
>> return parse_node_list(arg);
>> }
>>
>> -#define BIT(x) (1ul << x)
>> -
>> static inline uint32_t lfsr_32(uint32_t lfsr)
>> {
>> const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index 6da12e522edc..3f5fcf5d4b3f 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -983,6 +983,49 @@ static int write_dir_format(struct feat_fd *ff,
>> return do_write(ff, &data->dir.version, sizeof(data->dir.version));
>> }
>>
>> +#define SYSFS "/sys/devices/system/cpu/"
>> +
>> +/*
>> + * Check whether a CPU is online
>> + *
>> + * Returns:
>> + * 1 -> if CPU is online
>> + * 0 -> if CPU is offline
>> + * -1 -> error case
>> + */
>> +int is_cpu_online(unsigned int cpu)
>> +{
>> + char sysfs_cpu[255];
>> + char buf[255];
>> + struct stat statbuf;
>> + size_t len;
>> + int fd;
>> +
>> + snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u", cpu);
>> +
>> + if (stat(sysfs_cpu, &statbuf) != 0)
>> + return 0;
>> +
>> + /*
>> + * Check if /sys/devices/system/cpu/cpux/online file
>> + * exists. In kernels without CONFIG_HOTPLUG_CPU, this
>> + * file won't exist.
>> + */
>> + snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u/online", cpu);
>> + if (stat(sysfs_cpu, &statbuf) != 0)
>> + return 1;
>> +
>> + fd = open(sysfs_cpu, O_RDONLY);
>> + if (fd == -1)
>> + return -1;
>> +
>> + len = read(fd, buf, sizeof(buf) - 1);
>> + buf[len] = '\0';
>> + close(fd);
>> +
>> + return strtoul(buf, NULL, 16);
>> +}
>> +
>> #ifdef HAVE_LIBBPF_SUPPORT
>> static int write_bpf_prog_info(struct feat_fd *ff,
>> struct evlist *evlist __maybe_unused)
>> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
>> index c9e3265832d9..0eb4bc29a5a4 100644
>> --- a/tools/perf/util/header.h
>> +++ b/tools/perf/util/header.h
>> @@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t size);
>> int write_padded(struct feat_fd *fd, const void *bf,
>> size_t count, size_t count_aligned);
>>
>> +int is_cpu_online(unsigned int cpu);
>> /*
>> * arch specific callback
>> */
>> --
>> 2.35.1