[RFC][PATCH 2/3] locking,entry: #PF vs TRACE_IRQFLAGS
From: Peter Zijlstra
Date: Fri Aug 07 2020 - 15:34:37 EST
Much of the complexity in irqenter_{enter,exit}() is due to #PF being
the sole exception that can schedule from kernel context.
One additional wrinkle with #PF is that it is non-maskable, it can
happen _anywhere_. Due to this, and the wonders of tracing, we can get
the 'normal' NMI nesting vs TRACE_IRQFLAGS:
local_irq_disable()
raw_local_irq_disable();
trace_hardirqs_off();
local_irq_enable();
trace_hardirqs_on();
<#PF>
trace_hardirqs_off()
...
if (!regs_irqs_disabled(regs)
trace_hardirqs_on();
</#PF>
// WHOOPS -- lockdep thinks IRQs are disabled again!
raw_local_irqs_enable();
Rework irqenter_{enter,exit}() to save/restore the software state.
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
include/linux/entry-common.h | 1
kernel/entry/common.c | 52 ++++++++++++++++++++-----------------------
2 files changed, 26 insertions(+), 27 deletions(-)
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -310,6 +310,7 @@ void irqentry_exit_to_user_mode(struct p
#ifndef irqentry_state
typedef struct irqentry_state {
bool exit_rcu;
+ bool irqs_enabled;
} irqentry_state_t;
#endif
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -260,6 +260,7 @@ noinstr irqentry_state_t irqentry_enter(
{
irqentry_state_t ret = {
.exit_rcu = false,
+ .irqs_enabled = lockdep_hardirqs_enabled(),
};
if (user_mode(regs)) {
@@ -340,35 +341,32 @@ noinstr void irqentry_exit(struct pt_reg
/* Check whether this returns to user mode */
if (user_mode(regs)) {
irqentry_exit_to_user_mode(regs);
- } else if (!regs_irqs_disabled(regs)) {
- /*
- * If RCU was not watching on entry this needs to be done
- * carefully and needs the same ordering of lockdep/tracing
- * and RCU as the return to user mode path.
- */
- if (state.exit_rcu) {
- instrumentation_begin();
- /* Tell the tracer that IRET will enable interrupts */
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare(CALLER_ADDR0);
- instrumentation_end();
- rcu_irq_exit();
- lockdep_hardirqs_on(CALLER_ADDR0);
- return;
- }
+ return;
+ }
- instrumentation_begin();
+ instrumentation_begin();
+ /*
+ * When returning to interrupts enabled, and RCU was watching see if we
+ * need preemption.
+ */
+ if (!regs_irqs_disabled(regs) && !state.exit_rcu) {
if (IS_ENABLED(CONFIG_PREEMPTION))
irqentry_exit_cond_resched();
- /* Covers both tracing and lockdep */
- trace_hardirqs_on();
- instrumentation_end();
- } else {
- /*
- * IRQ flags state is correct already. Just tell RCU if it
- * was not watching on entry.
- */
- if (state.exit_rcu)
- rcu_irq_exit();
}
+
+ /*
+ * Return the TRACE_IRQFLAGS state to what we found on entry.
+ * Observe the correct order vs RCU.
+ */
+ if (state.irqs_enabled) {
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare(CALLER_ADDR0);
+ }
+ instrumentation_end();
+
+ if (state.exit_rcu)
+ rcu_irq_exit();
+
+ if (state.irqs_enabled)
+ lockdep_hardirqs_on(CALLER_ADDR0);
}