Re: [PATCH v4 2/5] x86, traps: Track entry into and exit from IST context

From: Paul E. McKenney
Date: Mon Nov 24 2014 - 18:31:16 EST


On Mon, Nov 24, 2014 at 02:57:54PM -0800, Paul E. McKenney wrote:
> On Mon, Nov 24, 2014 at 02:36:18PM -0800, Andy Lutomirski wrote:
> > On Mon, Nov 24, 2014 at 2:34 PM, Paul E. McKenney
> > <paulmck@xxxxxxxxxxxxxxxxxx> wrote:
> > > On Mon, Nov 24, 2014 at 01:35:01PM -0800, Paul E. McKenney wrote:
>
> [ . . . ]
>
> > > And the following Promela model claims that your approach works.
> > > Should I trust it? ;-)
> > >
> >
> > I think so.
> >
> > Want to write a patch? If so, whose tree should it go in? I can add
> > it to my IST series, but that seems a bit odd.
>
> Working on it. ;-)

And here is a sneak preview of the patch. Thoughts?

Thanx, Paul

------------------------------------------------------------------------

rcu: Make rcu_nmi_enter() handle nesting

Andy Lutomirski is introducing ISTs into x86, which from RCU's
viewpoint are NMIs. Because ISTs and NMIs can nest, rcu_nmi_enter() and
rcu_nmi_exit() must now correctly handle nesting. This commit therefore
introduces nesting, using a clever NMI-coordination algorithm suggested
by Andy. The trick is to atomically increment ->dynticks (if needed)
before manipulating ->dynticks_nmi_nesting on entry (and, accordingly,
after on exit). In addition, ->dynticks_nmi_nesting is incremented by
one if ->dynticks was incremented and by two otherwise. This means that
when rcu_nmi_exit() sees ->dynticks_nmi_nesting equal to one, it knows
that ->dynticks must be atomically incremented.

This NMI-coordination algorithms has been validated by the following
Promela model, for whatever that might be worth:

/*
* Promela model for Andy Lutomirski's suggested change to rcu_nmi_enter()
* that allows nesting.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, you can access it online at
* http://www.gnu.org/licenses/gpl-2.0.html.
*
* Copyright IBM Corporation, 2014
*
* Author: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
*/

byte dynticks_nesting = 0;
byte dynticks_nmi_nesting = 0;
byte dynticks = 0;

/*
* Promela verision of rcu_nmi_enter().
*/
inline rcu_nmi_enter()
{
byte incby;

incby = 2;
assert(dynticks_nmi_nesting >= 0);
if
:: (dynticks & 1) == 0 ->
atomic {
dynticks = dynticks + 1;
}
assert((dynticks & 1) == 1);
incby = 1;
:: else ->
skip;
fi;
dynticks_nmi_nesting = dynticks_nmi_nesting + incby;
assert(dynticks_nmi_nesting >= 1);
}

/*
* Promela verision of rcu_nmi_exit().
*/
inline rcu_nmi_exit()
{
assert(dynticks_nmi_nesting > 0);
assert((dynticks & 1) != 0);
if
:: dynticks_nmi_nesting != 1 ->
dynticks_nmi_nesting = dynticks_nmi_nesting - 2;
:: else ->
dynticks_nmi_nesting = 0;
atomic {
dynticks = dynticks + 1;
}
assert((dynticks & 1) == 0);
fi;
}

/*
* Base-level NMI runs non-atomically. Crudely emulates process-level
* dynticks-idle entry/exit.
*/
proctype base_NMI()
{
byte busy;

busy = 0;
do
:: if
:: 1 -> atomic {
dynticks = dynticks + 1;
}
busy = 0;
:: 1 -> skip;
fi;
rcu_nmi_enter();
assert((dynticks & 1) == 1);
rcu_nmi_exit();
if
:: busy -> skip;
:: !busy ->
atomic {
dynticks = dynticks + 1;
}
busy = 1;
fi;
od;
}

/*
* Nested NMI runs atomically to emulate interrupting base_level().
*/
proctype nested_NMI()
{
do
:: atomic {
rcu_nmi_enter();
assert((dynticks & 1) == 1);
rcu_nmi_exit();
}
od;
}

init {
run base_NMI();
run nested_NMI();
}

Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8749f43f3f05..fc0236992655 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -759,39 +759,71 @@ void rcu_irq_enter(void)
/**
* rcu_nmi_enter - inform RCU of entry to NMI context
*
- * If the CPU was idle with dynamic ticks active, and there is no
- * irq handler running, this updates rdtp->dynticks_nmi to let the
- * RCU grace-period handling know that the CPU is active.
+ * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and
+ * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know
+ * that the CPU is active. This implementation permits nested NMIs, as
+ * long as the nesting level does not overflow an int. (You will probably
+ * run out of stack space first.)
*/
void rcu_nmi_enter(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
+ int incby = 2;

- if (rdtp->dynticks_nmi_nesting == 0 &&
- (atomic_read(&rdtp->dynticks) & 0x1))
- return;
- rdtp->dynticks_nmi_nesting++;
- smp_mb__before_atomic(); /* Force delay from prior write. */
- atomic_inc(&rdtp->dynticks);
- /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
- smp_mb__after_atomic(); /* See above. */
- WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+ /* Complain about underflow. */
+ WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0);
+
+ /*
+ * If idle from RCU viewpoint, atomically increment ->dynticks
+ * to mark non-idle and increment ->dynticks_nmi_nesting by one.
+ * Otherwise, increment ->dynticks_nmi_nesting by two. This means
+ * if ->dynticks_nmi_nesting is equal to one, we are guaranteed
+ * to be in the outermost NMI handler that interrupted an RCU-idle
+ * period (observation due to Andy Lutomirski).
+ */
+ if (!(atomic_read(&rdtp->dynticks) & 0x1)) {
+ smp_mb__before_atomic(); /* Force delay from prior write. */
+ atomic_inc(&rdtp->dynticks);
+ /* atomic_inc() before later RCU read-side crit sects */
+ smp_mb__after_atomic(); /* See above. */
+ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+ incby = 1;
+ }
+ rdtp->dynticks_nmi_nesting += incby;
+ barrier();
}

/**
* rcu_nmi_exit - inform RCU of exit from NMI context
*
- * If the CPU was idle with dynamic ticks active, and there is no
- * irq handler running, this updates rdtp->dynticks_nmi to let the
- * RCU grace-period handling know that the CPU is no longer active.
+ * If we are returning from the outermost NMI handler that interrupted an
+ * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting
+ * to let the RCU grace-period handling know that the CPU is back to
+ * being RCU-idle.
*/
void rcu_nmi_exit(void)
{
struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);

- if (rdtp->dynticks_nmi_nesting == 0 ||
- --rdtp->dynticks_nmi_nesting != 0)
+ /*
+ * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
+ * (We are exiting an NMI handler, so RCU better be paying attention
+ * to us!)
+ */
+ WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0);
+ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
+
+ /*
+ * If the nesting level is not 1, the CPU wasn't RCU-idle, so
+ * leave it in non-RCU-idle state.
+ */
+ if (rdtp->dynticks_nmi_nesting != 1) {
+ rdtp->dynticks_nmi_nesting -= 2;
return;
+ }
+
+ /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
+ rdtp->dynticks_nmi_nesting = 0;
/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
smp_mb__before_atomic(); /* See above. */
atomic_inc(&rdtp->dynticks);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/