[PATCH 07/16] smp: optimize smp_call_function_many_cond()

From: Yury Norov
Date: Mon Jul 18 2022 - 15:29:29 EST


smp_call_function_many_cond() is often passed with cpu_online_mask.
If this is the case, we can use num_online_cpus(), which is O(1)
instead of cpumask_{first,next}(), which is O(N).

It can be optimized further: if cpu_online_mask has 0 or single bit
set (depending on cpu_online(this_cpu), we can return result without
AND'ing with user's mask.

Caught with CONFIG_DEBUG_BITMAP:
[ 7.830337] Call trace:
[ 7.830397] __bitmap_check_params+0x1d8/0x260
[ 7.830499] smp_call_function_many_cond+0x1e8/0x45c
[ 7.830607] kick_all_cpus_sync+0x44/0x80
[ 7.830698] bpf_int_jit_compile+0x34c/0x5cc
[ 7.830796] bpf_prog_select_runtime+0x118/0x190
[ 7.830900] bpf_prepare_filter+0x3dc/0x51c
[ 7.830995] __get_filter+0xd4/0x170
[ 7.831145] sk_attach_filter+0x18/0xb0
[ 7.831236] sock_setsockopt+0x5b0/0x1214
[ 7.831330] __sys_setsockopt+0x144/0x170
[ 7.831431] __arm64_sys_setsockopt+0x2c/0x40
[ 7.831541] invoke_syscall+0x48/0x114
[ 7.831634] el0_svc_common.constprop.0+0x44/0xfc
[ 7.831745] do_el0_svc+0x30/0xc0
[ 7.831825] el0_svc+0x2c/0x84
[ 7.831899] el0t_64_sync_handler+0xbc/0x140
[ 7.831999] el0t_64_sync+0x18c/0x190
[ 7.832086] ---[ end trace 0000000000000000 ]---
[ 7.832375] b1: ffff24d1ffd98a48
[ 7.832385] b2: ffffa65533a29a38
[ 7.832393] b3: ffffa65533a29a38
[ 7.832400] nbits: 256
[ 7.832407] start: 0
[ 7.832412] off: 0
[ 7.832418] smp: Bitmap: parameters check failed
[ 7.832432] smp: include/linux/bitmap.h [363]: bitmap_and

Signed-off-by: Yury Norov <yury.norov@xxxxxxxxx>
---
kernel/smp.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index dd215f439426..7ed2b9b12f74 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -880,6 +880,28 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
#define SCF_WAIT (1U << 0)
#define SCF_RUN_LOCAL (1U << 1)

+/* Check if we need remote execution, i.e., any CPU excluding this one. */
+static inline bool __need_remote_exec(const struct cpumask *mask, unsigned int this_cpu)
+{
+ unsigned int cpu;
+
+ switch (num_online_cpus()) {
+ case 0:
+ return false;
+ case 1:
+ return cpu_online(this_cpu) ? false : true;
+ default:
+ if (mask == cpu_online_mask)
+ return true;
+ }
+
+ cpu = cpumask_first_and(mask, cpu_online_mask);
+ if (cpu == this_cpu)
+ cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+
+ return cpu < nr_cpu_ids;
+}
+
static void smp_call_function_many_cond(const struct cpumask *mask,
smp_call_func_t func, void *info,
unsigned int scf_flags,
@@ -916,12 +938,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
run_local = true;

- /* Check if we need remote execution, i.e., any CPU excluding this one. */
- cpu = cpumask_first_and(mask, cpu_online_mask);
- if (cpu == this_cpu)
- cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
- if (cpu < nr_cpu_ids)
- run_remote = true;
+ run_remote = __need_remote_exec(mask, this_cpu);

if (run_remote) {
cfd = this_cpu_ptr(&cfd_data);
--
2.34.1