Re: [PATCH tip/core/rcu 02/18] rcu: Move rcu_report_exp_rnp() to allow consolidation

From: Paul E. McKenney
Date: Thu Oct 08 2015 - 20:10:57 EST


On Thu, Oct 08, 2015 at 07:12:03PM +0200, Peter Zijlstra wrote:
> On Thu, Oct 08, 2015 at 08:33:51AM -0700, Paul E. McKenney wrote:
>
> > > > o CPU B therefore moves up the tree, acquiring the parent
> > > > rcu_node structures' ->lock. In so doing, it forces full
> > > > ordering against all prior RCU read-side critical sections
> > > > of all CPUs corresponding to all leaf rcu_node structures
> > > > subordinate to the current (non-leaf) rcu_node structure.
> > >
> > > And here we iterate the tree and get another lock var involved, here the
> > > barrier upgrade will actually do something.
> >
> > Yep. And I am way too lazy to sort out exactly which acquisitions really
> > truly need smp_mb__after_unlock_lock() and which don't. Besides, if I
> > tried to sort it out, I would occasionally get it wrong, and this would be
> > a real pain to debug. Therefore, I simply do smp_mb__after_unlock_lock()
> > on all acquisitions of the rcu_node structures' ->lock fields. I can
> > actually validate that! ;-)
>
> This is a whole different line of reasoning once again.
>
> The point remains, that the sole purpose of the barrier upgrade is for
> the tree iteration, having some extra (pointless but harmless) instances
> does not detract from that.
>
> > Fair enough, but I will be sticking to the simple coding rule that keeps
> > RCU out of trouble!
>
> Note that there are rnp->lock acquires without the extra barrier though,
> so you seem somewhat inconsistent with your own rule.
>
> See for example:
>
> rcu_dump_cpu_stacks()
> print_other_cpu_stall()
> print_cpu_stall()
>
> (did not do an exhaustive scan, there might be more)
>
> and yes, that is 'obvious' debug code and not critical to the correct
> behaviour of the code, but it is a deviation from 'the rule'.

How about the following patch on top of yours?

Thanx, Paul

------------------------------------------------------------------------

commit 65764359aaec9513bc6aa94e79069469ec74b53e
Author: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Date: Thu Oct 8 15:36:54 2015 -0700

rcu: Add transitivity to remaining rcu_node ->lock acquisitions

The rule is that all acquisitions of the rcu_node structure's ->lock
must provide transitivity: The lock is not acquired that frequently,
and sorting out exactly which required it and which did not would be
a maintenance nightmare. This commit therefore supplies the needed
transitivity to the remaining ->lock acquisitions.

Reported-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index daf17e248757..81aa1cdc6bc9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1214,7 +1214,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
struct rcu_node *rnp;

rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
if (rnp->qsmask & (1UL << cpu))
@@ -1237,7 +1237,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)

/* Only let one CPU complain about others per time interval. */

- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
delta = jiffies - READ_ONCE(rsp->jiffies_stall);
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1256,7 +1256,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
rsp->name);
print_cpu_stall_info_begin();
rcu_for_each_leaf_node(rsp, rnp) {
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
ndetected += rcu_print_task_stall(rnp);
if (rnp->qsmask != 0) {
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@@ -1327,7 +1327,7 @@ static void print_cpu_stall(struct rcu_state *rsp)

rcu_dump_cpu_stacks(rsp);

- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
WRITE_ONCE(rsp->jiffies_stall,
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
@@ -2897,7 +2897,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
/* Does this CPU require a not-yet-started grace period? */
local_irq_save(flags);
if (cpu_needs_another_gp(rsp, rdp)) {
- raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
+ raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
needwake = rcu_start_gp(rsp);
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
if (needwake)
@@ -3718,7 +3718,7 @@ retry_ipi:
mask_ofl_ipi &= ~mask;
} else {
/* Failed, raced with offline. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (cpu_online(cpu) &&
(rnp->expmask & mask)) {
raw_spin_unlock_irqrestore(&rnp->lock,
@@ -3727,8 +3727,8 @@ retry_ipi:
if (cpu_online(cpu) &&
(rnp->expmask & mask))
goto retry_ipi;
- raw_spin_lock_irqsave(&rnp->lock,
- flags);
+ raw_spin_lock_irqsave_rcu_node(rnp,
+ flags);
}
if (!(rnp->expmask & mask))
mask_ofl_ipi &= ~mask;
@@ -4110,7 +4110,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
rnp = rnp->parent;
if (rnp == NULL)
return;
- raw_spin_lock(&rnp->lock); /* Interrupts already disabled. */
+ raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
rnp->qsmaskinit |= mask;
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
}
@@ -4127,7 +4127,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);

/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
@@ -4154,7 +4154,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
struct rcu_node *rnp = rcu_get_root(rsp);

/* Set up local state, ensuring consistent view of global state. */
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rdp->qlen_last_fqs_check = 0;
rdp->n_force_qs_snap = rsp->n_force_qs;
rdp->blimit = blimit;
@@ -4301,7 +4301,7 @@ static int __init rcu_spawn_gp_kthread(void)
t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name);
BUG_ON(IS_ERR(t));
rnp = rcu_get_root(rsp);
- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
rsp->gp_kthread = t;
if (kthread_prio) {
sp.sched_priority = kthread_prio;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index fa0e3b96a9ed..57ba873d2f18 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -525,7 +525,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
unsigned long flags;
struct task_struct *t;

- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (!rcu_preempt_blocked_readers_cgp(rnp)) {
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index ef7093cc9b5c..8efaba870d96 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -319,7 +319,7 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
unsigned long gpmax;
struct rcu_node *rnp = &rsp->node[0];

- raw_spin_lock_irqsave(&rnp->lock, flags);
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
completed = READ_ONCE(rsp->completed);
gpnum = READ_ONCE(rsp->gpnum);
if (completed == gpnum)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/