Re: [PATCH v2 2/2] locktorture: call percpu_free_rwsem() to do percpu-rwsem cleanup

From: Paul E. McKenney
Date: Fri Sep 25 2020 - 13:13:34 EST


On Thu, Sep 24, 2020 at 10:18:54PM +0800, Hou Tao wrote:
> When do percpu-rwsem writer lock torture, the RCU callback
> rcu_sync_func() may still be pending after locktorture module
> is removed, and it will lead to the following Oops:
>
> BUG: unable to handle page fault for address: ffffffffc00eb920
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 6500a067 P4D 6500a067 PUD 6500c067 PMD 13a36c067 PTE 800000013691c163
> Oops: 0000 [#1] PREEMPT SMP
> CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc5+ #4
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
> RIP: 0010:rcu_cblist_dequeue+0x12/0x30
> Call Trace:
> <IRQ>
> rcu_core+0x1b1/0x860
> __do_softirq+0xfe/0x326
> asm_call_on_stack+0x12/0x20
> </IRQ>
> do_softirq_own_stack+0x5f/0x80
> irq_exit_rcu+0xaf/0xc0
> sysvec_apic_timer_interrupt+0x2e/0xb0
> asm_sysvec_apic_timer_interrupt+0x12/0x20
>
> Fix it by adding an exit hook in lock_torture_ops and
> use it to call percpu_free_rwsem() for percpu rwsem torture
> before the module is removed, so we can ensure rcu_sync_func()
> completes before module exits.
>
> Also needs to call exit hook if lock_torture_init() fails half-way,
> so add init_called field in lock_torture_cxt to indicate that
> init hook has been called.
>
> Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>

Nice, thank you! I have queued this for v5.11.

As always, I could not resist reworking the commit log as below, and
as always, please check to make sure that I didn't mess anything up.

Thanx, Paul

------------------------------------------------------------------------

commit f794b408207152995aefdf41ee9a3f5c974f622a
Author: Hou Tao <houtao1@xxxxxxxxxx>
Date: Thu Sep 24 22:18:54 2020 +0800

locktorture: Invoke percpu_free_rwsem() to do percpu-rwsem cleanup

When executing the LOCK06 locktorture scenario featuring percpu-rwsem,
the RCU callback rcu_sync_func() may still be pending after locktorture
module is removed. This can in turn lead to the following Oops:

BUG: unable to handle page fault for address: ffffffffc00eb920
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 6500a067 P4D 6500a067 PUD 6500c067 PMD 13a36c067 PTE 800000013691c163
Oops: 0000 [#1] PREEMPT SMP
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.9.0-rc5+ #4
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
RIP: 0010:rcu_cblist_dequeue+0x12/0x30
Call Trace:
<IRQ>
rcu_core+0x1b1/0x860
__do_softirq+0xfe/0x326
asm_call_on_stack+0x12/0x20
</IRQ>
do_softirq_own_stack+0x5f/0x80
irq_exit_rcu+0xaf/0xc0
sysvec_apic_timer_interrupt+0x2e/0xb0
asm_sysvec_apic_timer_interrupt+0x12/0x20

This commit avoids tis problem by adding an exit hook in lock_torture_ops
and using it to call percpu_free_rwsem() for percpu rwsem torture during
the module-cleanup function, thus ensuring that rcu_sync_func() completes
before module exits.

It is also necessary to call the exit hook if lock_torture_init()
fails half-way, so this commit also adds an ->init_called field in
lock_torture_cxt to indicate that exit hook, if present, must be called.

Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>
Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxx>

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 79fbd97..fd838ce 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -76,6 +76,7 @@ static void lock_torture_cleanup(void);
*/
struct lock_torture_ops {
void (*init)(void);
+ void (*exit)(void);
int (*writelock)(void);
void (*write_delay)(struct torture_random_state *trsp);
void (*task_boost)(struct torture_random_state *trsp);
@@ -92,12 +93,13 @@ struct lock_torture_cxt {
int nrealwriters_stress;
int nrealreaders_stress;
bool debug_lock;
+ bool init_called;
atomic_t n_lock_torture_errors;
struct lock_torture_ops *cur_ops;
struct lock_stress_stats *lwsa; /* writer statistics */
struct lock_stress_stats *lrsa; /* reader statistics */
};
-static struct lock_torture_cxt cxt = { 0, 0, false,
+static struct lock_torture_cxt cxt = { 0, 0, false, false,
ATOMIC_INIT(0),
NULL, NULL};
/*
@@ -573,6 +575,11 @@ static void torture_percpu_rwsem_init(void)
BUG_ON(percpu_init_rwsem(&pcpu_rwsem));
}

+static void torture_percpu_rwsem_exit(void)
+{
+ percpu_free_rwsem(&pcpu_rwsem);
+}
+
static int torture_percpu_rwsem_down_write(void) __acquires(pcpu_rwsem)
{
percpu_down_write(&pcpu_rwsem);
@@ -597,6 +604,7 @@ static void torture_percpu_rwsem_up_read(void) __releases(pcpu_rwsem)

static struct lock_torture_ops percpu_rwsem_lock_ops = {
.init = torture_percpu_rwsem_init,
+ .exit = torture_percpu_rwsem_exit,
.writelock = torture_percpu_rwsem_down_write,
.write_delay = torture_rwsem_write_delay,
.task_boost = torture_boost_dummy,
@@ -789,9 +797,10 @@ static void lock_torture_cleanup(void)

/*
* Indicates early cleanup, meaning that the test has not run,
- * such as when passing bogus args when loading the module. As
- * such, only perform the underlying torture-specific cleanups,
- * and avoid anything related to locktorture.
+ * such as when passing bogus args when loading the module.
+ * However cxt->cur_ops.init() may have been invoked, so beside
+ * perform the underlying torture-specific cleanups, cur_ops.exit()
+ * will be invoked if needed.
*/
if (!cxt.lwsa && !cxt.lrsa)
goto end;
@@ -831,6 +840,11 @@ static void lock_torture_cleanup(void)
cxt.lrsa = NULL;

end:
+ if (cxt.init_called) {
+ if (cxt.cur_ops->exit)
+ cxt.cur_ops->exit();
+ cxt.init_called = false;
+ }
torture_cleanup_end();
}

@@ -878,8 +892,10 @@ static int __init lock_torture_init(void)
goto unwind;
}

- if (cxt.cur_ops->init)
+ if (cxt.cur_ops->init) {
cxt.cur_ops->init();
+ cxt.init_called = true;
+ }

if (nwriters_stress >= 0)
cxt.nrealwriters_stress = nwriters_stress;