[PATCH rcu 04/18] rcutorture: Decorate failing reader segments with CPU ID

From: Paul E. McKenney
Date: Thu Dec 12 2024 - 13:50:34 EST


This commit adds CPU number to the "Failure/close-call rcutorture reader
segments" list printed at the end of an rcutorture run that had too-short
grace periods. This information can help debugging interactions with
migration and CPU hotplug.

However, experience indicates that sampling the CPU number in rcutorture's
read-side code can reduce the probability of too-short bugs by a small
integer factor. And small integer factors are crucial to RCU bug hunting,
so this commit also introduces a default-off RCU_TORTURE_TEST_LOG_CPU
Kconfig option to enable this CPU-number-logging functionality at
build time.

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>
---
kernel/rcu/Kconfig.debug | 15 +++++++++++++++
kernel/rcu/rcutorture.c | 9 +++++++--
2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 9b0b52e1836fa..b3ac000004bfe 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -53,6 +53,21 @@ config RCU_TORTURE_TEST
Say M if you want the RCU torture tests to build as a module.
Say N if you are unsure.

+config RCU_TORTURE_TEST_LOG_CPU
+ tristate "Log CPU for rcutorture failures"
+ depends on RCU_TORTURE_TEST
+ default n
+ help
+ This option causes rcutorture to decorate each entry of its
+ log of failure/close-call rcutorture reader segments with the
+ number of the CPU that the reader was running on at the time.
+ This information can be useful, but it does incur additional
+ overhead, overhead that can make both failures and close calls
+ less probable.
+
+ Say Y here if you want CPU IDs logged.
+ Say N if you are unsure.
+
config RCU_REF_SCALE_TEST
tristate "Scalability tests for read-side synchronization (RCU and others)"
depends on DEBUG_KERNEL
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 99780a74da44c..0bc6fc5822153 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -262,6 +262,7 @@ struct rt_read_seg {
unsigned long rt_delay_ms;
unsigned long rt_delay_us;
bool rt_preempted;
+ int rt_cpu;
};
static int err_segs_recorded;
static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS];
@@ -1862,6 +1863,8 @@ static void rcutorture_one_extend(int *readstate, int newstate,
WARN_ON_ONCE(idxold2 < 0);
WARN_ON_ONCE(idxold2 & ~RCUTORTURE_RDR_ALLBITS);
rtrsp->rt_readstate = newstate;
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU))
+ rtrsp->rt_cpu = raw_smp_processor_id();

/* First, put new protection in place to avoid critical-section gap. */
if (statesnew & RCUTORTURE_RDR_BH)
@@ -3559,8 +3562,10 @@ rcu_torture_cleanup(void)
err_segs[i].rt_delay_us);
firsttime = 0;
}
- pr_cont("%s\n",
- err_segs[i].rt_preempted ? "preempted" : "");
+ pr_cont("%s", err_segs[i].rt_preempted ? "preempted" : "");
+ if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU))
+ pr_cont(" CPU %d", err_segs[i].rt_cpu);
+ pr_cont("\n");

}
}
--
2.40.1