Re: [PATCH 3/6] hotplug: Optimize cpu_hotplug_{begin,done}() usingrcu_sync

From: Paul E. McKenney
Date: Tue Oct 08 2013 - 12:28:29 EST


On Tue, Oct 08, 2013 at 12:25:08PM +0200, Peter Zijlstra wrote:
> Use the fancy new rcu_sync bits from Oleg to optimize the fancy new
> hotplug lock implementation.
>
> Reviewed-by: Oleg Nesterov <oleg@xxxxxxxxxx>
> Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>

Reviewed-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>

> ---
> include/linux/cpu.h | 7 +++---
> kernel/cpu.c | 54 +++++++++++++++++++++++-----------------------------
> 2 files changed, 28 insertions(+), 33 deletions(-)
>
> --- a/include/linux/cpu.h
> +++ b/include/linux/cpu.h
> @@ -18,6 +18,7 @@
> #include <linux/cpumask.h>
> #include <linux/percpu.h>
> #include <linux/sched.h>
> +#include <linux/rcusync.h>
>
> struct device;
>
> @@ -180,7 +181,7 @@ extern void cpu_hotplug_init_task(struct
> extern void cpu_hotplug_begin(void);
> extern void cpu_hotplug_done(void);
>
> -extern int __cpuhp_state;
> +extern struct rcu_sync_struct __cpuhp_rss;
> DECLARE_PER_CPU(unsigned int, __cpuhp_refcount);
>
> extern void __get_online_cpus(void);
> @@ -204,7 +205,7 @@ static inline void get_online_cpus(void)
> * writer will see anything we did within this RCU-sched read-side
> * critical section.
> */
> - if (likely(!__cpuhp_state))
> + if (likely(rcu_sync_is_idle(&__cpuhp_rss)))
> __this_cpu_inc(__cpuhp_refcount);
> else
> __get_online_cpus(); /* Unconditional memory barrier. */
> @@ -231,7 +232,7 @@ static inline void put_online_cpus(void)
> /*
> * Same as in get_online_cpus().
> */
> - if (likely(!__cpuhp_state))
> + if (likely(rcu_sync_is_idle(&__cpuhp_rss)))
> __this_cpu_dec(__cpuhp_refcount);
> else
> __put_online_cpus(); /* Unconditional memory barrier. */
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -49,14 +49,15 @@ static int cpu_hotplug_disabled;
>
> #ifdef CONFIG_HOTPLUG_CPU
>
> -enum { readers_fast = 0, readers_slow, readers_block };
> +enum { readers_slow, readers_block };
>
> -int __cpuhp_state;
> -EXPORT_SYMBOL_GPL(__cpuhp_state);
> +DEFINE_RCU_SCHED_SYNC(__cpuhp_rss);
> +EXPORT_SYMBOL_GPL(__cpuhp_rss);
>
> DEFINE_PER_CPU(unsigned int, __cpuhp_refcount);
> EXPORT_PER_CPU_SYMBOL_GPL(__cpuhp_refcount);
>
> +static int cpuhp_state = readers_slow;
> static atomic_t cpuhp_waitcount;
> static DECLARE_WAIT_QUEUE_HEAD(cpuhp_readers);
> static DECLARE_WAIT_QUEUE_HEAD(cpuhp_writer);
> @@ -68,7 +69,6 @@ void cpu_hotplug_init_task(struct task_s
>
> void __get_online_cpus(void)
> {
> -again:
> __this_cpu_inc(__cpuhp_refcount);
>
> /*
> @@ -77,7 +77,7 @@ void __get_online_cpus(void)
> * increment-on-one-CPU-and-decrement-on-another problem.
> *
> * And yes, if the reader misses the writer's assignment of
> - * readers_block to __cpuhp_state, then the writer is
> + * readers_block to cpuhp_state, then the writer is
> * guaranteed to see the reader's increment. Conversely, any
> * readers that increment their __cpuhp_refcount after the
> * writer looks are guaranteed to see the readers_block value,
> @@ -88,7 +88,7 @@ void __get_online_cpus(void)
>
> smp_mb(); /* A matches D */
>
> - if (likely(__cpuhp_state != readers_block))
> + if (likely(cpuhp_state != readers_block))
> return;
>
> /*
> @@ -108,19 +108,19 @@ void __get_online_cpus(void)
> * and reschedule on the preempt_enable() in get_online_cpus().
> */
> preempt_enable_no_resched();
> - __wait_event(cpuhp_readers, __cpuhp_state != readers_block);
> + __wait_event(cpuhp_readers, cpuhp_state != readers_block);
> preempt_disable();
>
> + __this_cpu_inc(__cpuhp_refcount);
> +
> /*
> - * Given we've still got preempt_disabled and new cpu_hotplug_begin()
> - * must do a synchronize_sched() we're guaranteed a successfull
> - * acquisition this time -- even if we wake the current
> - * cpu_hotplug_end() now.
> + * cpu_hotplug_done() waits until all pending readers are gone;
> + * this means that a new cpu_hotplug_begin() must observe our
> + * refcount increment and wait for it to go away.
> */
> - if (atomic_dec_and_test(&cpuhp_waitcount))
> - wake_up(&cpuhp_writer);
>
> - goto again;
> + if (atomic_dec_and_test(&cpuhp_waitcount)) /* A */
> + wake_up(&cpuhp_writer);
> }
> EXPORT_SYMBOL_GPL(__get_online_cpus);
>
> @@ -186,21 +186,18 @@ void cpu_hotplug_begin(void)
> current->cpuhp_ref++;
>
> /* Notify readers to take the slow path. */
> - __cpuhp_state = readers_slow;
> -
> - /* See percpu_down_write(); guarantees all readers take the slow path */
> - synchronize_sched();
> + rcu_sync_enter(&__cpuhp_rss);
>
> /*
> * Notify new readers to block; up until now, and thus throughout the
> - * longish synchronize_sched() above, new readers could still come in.
> + * longish rcu_sync_enter() above, new readers could still come in.
> */
> - __cpuhp_state = readers_block;
> + cpuhp_state = readers_block;
>
> smp_mb(); /* D matches A */
>
> /*
> - * If they don't see our writer of readers_block to __cpuhp_state,
> + * If they don't see our writer of readers_block to cpuhp_state,
> * then we are guaranteed to see their __cpuhp_refcount increment, and
> * therefore will wait for them.
> */
> @@ -218,26 +215,23 @@ void cpu_hotplug_done(void)
> * that new readers might fail to see the results of this writer's
> * critical section.
> */
> - __cpuhp_state = readers_slow;
> + cpuhp_state = readers_slow;
> wake_up_all(&cpuhp_readers);
>
> /*
> * The wait_event()/wake_up_all() prevents the race where the readers
> - * are delayed between fetching __cpuhp_state and blocking.
> + * are delayed between fetching cpuhp_state and blocking.
> */
>
> - /* See percpu_up_write(); readers will no longer attempt to block. */
> - synchronize_sched();
> -
> - /* Let 'em rip */
> - __cpuhp_state = readers_fast;
> current->cpuhp_ref--;
>
> /*
> - * Wait for any pending readers to be running. This ensures readers
> - * after writer and avoids writers starving readers.
> + * Wait for any pending readers to be running. This avoids writers
> + * starving readers.
> */
> wait_event(cpuhp_writer, !atomic_read(&cpuhp_waitcount));
> +
> + rcu_sync_exit(&__cpuhp_rss);
> }
>
> /*
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/