Re: [PATCH v2 1/4] tools/perf: Fix perf bench futex to correct usage of affinity for machines with #CPUs > 1K

From: Srikar Dronamraju
Date: Fri Apr 08 2022 - 08:28:12 EST


* Athira Rajeev <atrajeev@xxxxxxxxxxxxxxxxxx> [2022-04-06 23:21:10]:

> perf bench futex testcase fails on systems with CPU's
> more than 1K.
>
> Testcase: perf bench futex all
> Failure snippet:
> <<>>Running futex/hash benchmark...
>
> perf: pthread_create: No such file or directory
> <<>>
>
> All the futex benchmarks ( ie hash, lock-api, requeue, wake,
> wake-parallel ), pthread_create is invoked in respective bench_futex_*
> function. Though the logs shows direct failure from pthread_create,
> strace logs showed that actual failure is from "sched_setaffinity"
> returning EINVAL (invalid argument). This happens because the default
> mask size in glibc is 1024. To overcome this 1024 CPUs mask size
> limitation of cpu_set_t, change the mask size using the CPU_*_S macros.
>
> Patch addresses this by fixing all the futex benchmarks to use
> CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and
> CPU_SET_S to set the mask.
>
> Tested-by: Disha Goel <disgoel@xxxxxxxxxxxxxxxxxx>
> Signed-off-by: Athira Rajeev <atrajeev@xxxxxxxxxxxxxxxxxx>
> Reported-by: Disha Goel <disgoel@xxxxxxxxxxxxxxxxxx>

Looks good to me
Reviewed-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>


> ---
> tools/perf/bench/futex-hash.c | 26 +++++++++++++++++++-------
> tools/perf/bench/futex-lock-pi.c | 21 ++++++++++++++++-----
> tools/perf/bench/futex-requeue.c | 21 ++++++++++++++++-----
> tools/perf/bench/futex-wake-parallel.c | 21 ++++++++++++++++-----
> tools/perf/bench/futex-wake.c | 22 ++++++++++++++++------
> 5 files changed, 83 insertions(+), 28 deletions(-)
>
> diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
> index 9627b6ab8670..dfce64e551e2 100644
> --- a/tools/perf/bench/futex-hash.c
> +++ b/tools/perf/bench/futex-hash.c
> @@ -122,12 +122,14 @@ static void print_summary(void)
> int bench_futex_hash(int argc, const char **argv)
> {
> int ret = 0;
> - cpu_set_t cpuset;
> + cpu_set_t *cpuset;
> struct sigaction act;
> unsigned int i;
> pthread_attr_t thread_attr;
> struct worker *worker = NULL;
> struct perf_cpu_map *cpu;
> + int nrcpus;
> + size_t size;
>
> argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
> if (argc) {
> @@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv)
> threads_starting = params.nthreads;
> pthread_attr_init(&thread_attr);
> gettimeofday(&bench__start, NULL);
> +
> + nrcpus = perf_cpu_map__nr(cpu);
> + cpuset = CPU_ALLOC(nrcpus);
> + BUG_ON(!cpuset);
> + size = CPU_ALLOC_SIZE(nrcpus);
> +
> for (i = 0; i < params.nthreads; i++) {
> worker[i].tid = i;
> worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
> if (!worker[i].futex)
> goto errmem;
>
> - CPU_ZERO(&cpuset);
> - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> + CPU_ZERO_S(size, cpuset);
>
> - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
> - if (ret)
> + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
> + ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
> + if (ret) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> -
> + }
> ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
> (void *)(struct worker *) &worker[i]);
> - if (ret)
> + if (ret) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_create");
> + }
>
> }
> + CPU_FREE(cpuset);
> pthread_attr_destroy(&thread_attr);
>
> pthread_mutex_lock(&thread_lock);
> diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
> index a512a320df74..61c3bb80d4cf 100644
> --- a/tools/perf/bench/futex-lock-pi.c
> +++ b/tools/perf/bench/futex-lock-pi.c
> @@ -120,11 +120,17 @@ static void *workerfn(void *arg)
> static void create_threads(struct worker *w, pthread_attr_t thread_attr,
> struct perf_cpu_map *cpu)
> {
> - cpu_set_t cpuset;
> + cpu_set_t *cpuset;
> unsigned int i;
> + int nrcpus = perf_cpu_map__nr(cpu);
> + size_t size;
>
> threads_starting = params.nthreads;
>
> + cpuset = CPU_ALLOC(nrcpus);
> + BUG_ON(!cpuset);
> + size = CPU_ALLOC_SIZE(nrcpus);
> +
> for (i = 0; i < params.nthreads; i++) {
> worker[i].tid = i;
>
> @@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
> } else
> worker[i].futex = &global_futex;
>
> - CPU_ZERO(&cpuset);
> - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> + CPU_ZERO_S(size, cpuset);
> + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
>
> - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> + }
>
> - if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
> + if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_create");
> + }
> }
> + CPU_FREE(cpuset);
> }
>
> int bench_futex_lock_pi(int argc, const char **argv)
> diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
> index aca47ce8b1e7..2cb013f7ffe5 100644
> --- a/tools/perf/bench/futex-requeue.c
> +++ b/tools/perf/bench/futex-requeue.c
> @@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused)
> static void block_threads(pthread_t *w,
> pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
> {
> - cpu_set_t cpuset;
> + cpu_set_t *cpuset;
> unsigned int i;
> + int nrcpus = perf_cpu_map__nr(cpu);
> + size_t size;
>
> threads_starting = params.nthreads;
>
> + cpuset = CPU_ALLOC(nrcpus);
> + BUG_ON(!cpuset);
> + size = CPU_ALLOC_SIZE(nrcpus);
> +
> /* create and block all threads */
> for (i = 0; i < params.nthreads; i++) {
> - CPU_ZERO(&cpuset);
> - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> + CPU_ZERO_S(size, cpuset);
> + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
>
> - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> + }
>
> - if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
> + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_create");
> + }
> }
> + CPU_FREE(cpuset);
> }
>
> static void toggle_done(int sig __maybe_unused,
> diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
> index 888ee6037945..efa5070a5eb3 100644
> --- a/tools/perf/bench/futex-wake-parallel.c
> +++ b/tools/perf/bench/futex-wake-parallel.c
> @@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused)
> static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
> struct perf_cpu_map *cpu)
> {
> - cpu_set_t cpuset;
> + cpu_set_t *cpuset;
> unsigned int i;
> + int nrcpus = perf_cpu_map__nr(cpu);
> + size_t size;
>
> threads_starting = params.nthreads;
>
> + cpuset = CPU_ALLOC(nrcpus);
> + BUG_ON(!cpuset);
> + size = CPU_ALLOC_SIZE(nrcpus);
> +
> /* create and block all threads */
> for (i = 0; i < params.nthreads; i++) {
> - CPU_ZERO(&cpuset);
> - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> + CPU_ZERO_S(size, cpuset);
> + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
>
> - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> + }
>
> - if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
> + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_create");
> + }
> }
> + CPU_FREE(cpuset);
> }
>
> static void print_run(struct thread_data *waking_worker, unsigned int run_num)
> diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
> index aa82db51c0ab..3a10f54900c1 100644
> --- a/tools/perf/bench/futex-wake.c
> +++ b/tools/perf/bench/futex-wake.c
> @@ -97,22 +97,32 @@ static void print_summary(void)
> static void block_threads(pthread_t *w,
> pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
> {
> - cpu_set_t cpuset;
> + cpu_set_t *cpuset;
> unsigned int i;
> -
> + size_t size;
> + int nrcpus = perf_cpu_map__nr(cpu);
> threads_starting = params.nthreads;
>
> + cpuset = CPU_ALLOC(nrcpus);
> + BUG_ON(!cpuset);
> + size = CPU_ALLOC_SIZE(nrcpus);
> +
> /* create and block all threads */
> for (i = 0; i < params.nthreads; i++) {
> - CPU_ZERO(&cpuset);
> - CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
> + CPU_ZERO_S(size, cpuset);
> + CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
>
> - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
> + if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
> + }
>
> - if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
> + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
> + CPU_FREE(cpuset);
> err(EXIT_FAILURE, "pthread_create");
> + }
> }
> + CPU_FREE(cpuset);
> }
>
> static void toggle_done(int sig __maybe_unused,
> --
> 2.35.1
>