[PATCH] kernel/rcustate.c: state machine based rcu implementation

From: Manfred Spraul
Date: Sun Oct 12 2008 - 10:25:44 EST

Next message: JÃrn Engel: "Re: Filesystem for block devices using flash storage?"
Previous message: Josef Bacik: "Re: [2.6 patch] provide generic_block_fiemap() only with BLOCK=y"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

I've updated the state machine based rcu code.
The main new point is a rewritten rcu_irq_exit() code, it should now
scale (no more write accesses to global memory).

Main points:
- As previously a state machine with system wide states: Either
accumulate further call_rcu() callbacks, or collect the
callbacks for the next grace period, or wait for a quiescent
state.
Rational:
The rules for the state transistions are different for each state,
thus a system wide state allows simpler transfer.
e.g.: nohz cpus never have pending call_rcu() callbacks. Thus they
can be skipped entirely for the "collect" stage.
Right now there is no global state, thus every transistion must be
treated as a grace period.
- Improved latency: There is only one for_each_cpu() loop per grace
period, and even that loop is from schedule_work() with enabled
local interrupts.
Rational:
for_each_cpu() loops with disabled local interrupts will cause
latency problems.
- Experimental: it boots, nohz seems to works, cpu offline works.

What do you think?

The patch is against cdbb92b31d3c465aa96bd09f2d42c39b87b32bee plus the
CPU_STARTING patch I posted recently.

Signed-Off-By: Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>
---
include/linux/hardirq.h | 27 +-
include/linux/rcuclassic.h | 2 -
include/linux/rcucpumask.h | 150 ++++++
include/linux/rcupdate.h | 19 +-
include/linux/rcupreempt.h | 14 -
include/linux/rcustate.h | 284 +++++++++++
init/Kconfig | 12 +-
kernel/Makefile | 1 +
kernel/cpu.c | 5 +-
kernel/rcuclassic.c | 18 +
kernel/rcucpumask.c | 93 ++++
kernel/rcupreempt.c | 6 +-
kernel/rcustate.c | 1136 ++++++++++++++++++++++++++++++++++++++++++++
kernel/softirq.c | 2 +-
14 files changed, 1733 insertions(+), 36 deletions(-)
create mode 100644 include/linux/rcucpumask.h
create mode 100644 include/linux/rcustate.h
create mode 100644 kernel/rcucpumask.c
create mode 100644 kernel/rcustate.c

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 181006c..4c064a3 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -118,13 +118,13 @@ static inline void account_system_vtime(struct task_struct *tsk)
}
#endif

-#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
-extern void rcu_irq_enter(void);
-extern void rcu_irq_exit(void);
+#ifdef CONFIG_NO_HZ
+extern void rcu_irq_enter(int in_nmi);
+extern void rcu_irq_exit(int in_nmi);
#else
-# define rcu_irq_enter() do { } while (0)
-# define rcu_irq_exit() do { } while (0)
-#endif /* CONFIG_PREEMPT_RCU */
+# define rcu_irq_enter(in_nmi) do { } while (0)
+# define rcu_irq_exit(in_nmi) do { } while (0)
+#endif /* CONFIG_NO_HZ */

/*
* It is safe to do non-atomic ops on ->hardirq_context,
@@ -132,14 +132,17 @@ extern void rcu_irq_exit(void);
* always balanced, so the interrupted value of ->hardirq_context
* will always be restored.
*/
-#define __irq_enter() \
+#define ____irq_enter(in_nmi) \
do { \
- rcu_irq_enter(); \
+ rcu_irq_enter(in_nmi); \
account_system_vtime(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)

+#define __irq_enter() ____irq_enter(0)
+#define __irq_exit() ____irq_exit(0)
+
/*
* Enter irq context (on NO_HZ, update jiffies):
*/
@@ -148,12 +151,12 @@ extern void irq_enter(void);
/*
* Exit irq context without processing softirqs:
*/
-#define __irq_exit() \
+#define ____irq_exit(in_nmi) \
do { \
trace_hardirq_exit(); \
account_system_vtime(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
- rcu_irq_exit(); \
+ rcu_irq_exit(in_nmi); \
} while (0)

/*
@@ -161,7 +164,7 @@ extern void irq_enter(void);
*/
extern void irq_exit(void);

-#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
-#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
+#define nmi_enter() do { lockdep_off(); ____irq_enter(1); } while (0)
+#define nmi_exit() do { ____irq_exit(1); lockdep_on(); } while (0)

#endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 5f89b62..9178f17 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -168,8 +168,6 @@ extern struct lockdep_map rcu_lock_map;

#define __synchronize_sched() synchronize_rcu()

-#define call_rcu_sched(head, func) call_rcu(head, func)
-
extern void __rcu_init(void);
#define rcu_init_sched() do { } while (0)
extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcucpumask.h b/include/linux/rcucpumask.h
new file mode 100644
index 0000000..43cacd4
--- /dev/null
+++ b/include/linux/rcucpumask.h
@@ -0,0 +1,150 @@
+/*
+ * cpu mask with integrated locking, intended for rcu
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * (C) Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>, 2008
+ *
+ */
+
+#ifndef __LINUX_RCUCPUMASK_H
+#define __LINUX_RCUCPUMASK_H
+
+#include <linux/spinlock.h>
+#include <linux/cpumask.h>
+
+#define RCUCPUMASK_CPULIMIT 512
+
+#if (NR_CPUS > RCUCPUMASK_CPULIMIT)
+
+Bla Bla Bla
+
+#elif (NR_CPUS > 1)
+
+/*
+ * cpu bitmask:
+ * "normal" implementation, single spinlock.
+ */
+
+#define RCUCPUMASK_FLAT 1
+
+struct rcu_cpumask {
+ spinlock_t lock;
+
+ /* number of cpus that are tracked by rcu */
+ int cpus_total;
+
+ /* number of cpus that are still unresolved */
+ atomic_t cpus_open;
+
+ int state ____cacheline_internodealigned_in_smp;
+} ____cacheline_internodealigned_in_smp;
+
+#define __RCU_CPUMASK_INIT(ptr) { .lock = __SPIN_LOCK_UNLOCKED(&(ptr)->lock) }
+
+/**
+ * rcu_cpumask_init(rcm, new_state) - initialize cpu mask with all live cpus.
+ * @rcm: rcu cpumask pointer.
+ * @new_state: new global state of the state machine
+ *
+ * This function sets the cpu bits for all cpus that might read pointers
+ * to rcu protected structures.
+ */
+extern void rcu_cpumask_init(struct rcu_cpumask *rcm, int newstate, int setupcpus);
+
+/**
+ * rcu_cpumask_clear_and_test(rcm, cpu) - remove one cpu from cpumask
+ * @rcm: rcu cpumask pointer.
+ * @cpu: cpu to remove
+ *
+ * This function clears the bit for the given @cpu from the cpu mask.
+ * If no other bits are set, then the function returns 1, otherwise 0.
+ */
+extern int rcu_cpumask_clear_and_test(struct rcu_cpumask *rcm, int cpu);
+
+/**
+ * rcu_cpumask_addcpu(rcm, cpu) - list a cpu as important for rcu
+ * @rcm: rcu cpumask pointer.
+ * @cpu: cpu to remove
+ *
+ * This function adds the given cpu to the list of cpus that might access
+ * rcu related structures.
+ * The function return the current state, i.e. the state for which the cpu
+ * doesn't need to do anything.
+ */
+extern int rcu_cpumask_addcpu(struct rcu_cpumask *rcm, int cpu);
+
+/**
+ * rcu_cpumask_removecpu(rcm, cpu) - remove a cpu from cpu list.
+ * @rcm: rcu cpumask pointer.
+ * @cpu: cpu to remove
+ *
+ * The function removes the given @cpu from the list of rcu related cpus.
+ * A cpu that is not listed must neither call call_rcu() nor access any
+ * rcu protected structures.
+ *
+ * The function returns the state for which the cpu is still listed,
+ * i.e. the cpu must do the work for that state.
+ */
+extern int rcu_cpumask_removecpu(struct rcu_cpumask *rcm, int cpu);
+
+#else /* NR_CPUS == 1 */
+
+/*
+ * cpu bitmask: uniprocessor optimized.
+ * - there is just one cpu, it's always online.
+ * - clear_and_test always clears the only bit that could be set,
+ * thus it always returns 1.
+ * Conclusion: No datastorage at all needed.
+ */
+
+struct rcu_cpumask {
+ int state;
+};
+
+#define __RCU_CPUMASK_INIT(ptr) { .state = 0 }
+
+static inline void rcu_cpumask_init(struct rcu_cpumask *rcm, int newstate, int setupcpus)
+{
+ rcm->state = newstate;
+}
+static inline int rcu_cpumask_clear_and_test(struct rcu_cpumask *rcm, int cpu)
+{
+ return 1;
+}
+static inline int rcu_cpumask_addcpu(struct rcu_cpumask *rcm, int cpu)
+{
+ return rcm->state;
+}
+
+static inline int rcu_cpumask_removecpu(struct rcu_cpumask *rcm, int cpu)
+{
+ return rcm->state;
+}
+
+#endif /* NR_CPUS == 1 */
+
+/**
+ * rcu_cpumask_getstate(rcm) - retrieve the current state
+ * @rcm: rcu cpumask pointer.
+ *
+ * This function returns the current state from the cpu mask.
+ */
+static inline int rcu_cpumask_getstate(struct rcu_cpumask *rcm)
+{
+ return rcm->state;
+}
+
+#endif /* __LINUX_RCUCPUMASK_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 86f1f5e..69c81e2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,7 +52,9 @@ struct rcu_head {
void (*func)(struct rcu_head *head);
};

-#ifdef CONFIG_CLASSIC_RCU
+#ifdef CONFIG_STATE_RCU
+#include <linux/rcustate.h>
+#elif CONFIG_CLASSIC_RCU
#include <linux/rcuclassic.h>
#else /* #ifdef CONFIG_CLASSIC_RCU */
#include <linux/rcupreempt.h>
@@ -263,6 +265,21 @@ extern void call_rcu(struct rcu_head *head,
extern void call_rcu_bh(struct rcu_head *head,
void (*func)(struct rcu_head *head));

+/**
+ * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ *
+ * The update function will be invoked some time after a full
+ * synchronize_sched()-style grace period elapses, in other words after
+ * all currently executing preempt-disabled sections of code (including
+ * hardirq handlers, NMI handlers, and local_irq_save() blocks) have
+ * completed.
+ */
+extern void call_rcu_sched(struct rcu_head *head,
+ void (*func)(struct rcu_head *head));
+
+
/* Exported common interfaces */
extern void synchronize_rcu(void);
extern void rcu_barrier(void);
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 3e05c09..bef8562 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -65,20 +65,6 @@ static inline void rcu_qsctr_inc(int cpu)
*/
#define call_rcu_bh call_rcu

-/**
- * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
- *
- * The update function will be invoked some time after a full
- * synchronize_sched()-style grace period elapses, in other words after
- * all currently executing preempt-disabled sections of code (including
- * hardirq handlers, NMI handlers, and local_irq_save() blocks) have
- * completed.
- */
-extern void call_rcu_sched(struct rcu_head *head,
- void (*func)(struct rcu_head *head));
-
extern void __rcu_read_lock(void) __acquires(RCU);
extern void __rcu_read_unlock(void) __releases(RCU);
extern int rcu_pending(int cpu);
diff --git a/include/linux/rcustate.h b/include/linux/rcustate.h
new file mode 100644
index 0000000..c8c4657
--- /dev/null
+++ b/include/linux/rcustate.h
@@ -0,0 +1,284 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (classic version)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@xxxxxxxxxx>
+ *
+ * Based on the original work by Paul McKenney <paulmck@xxxxxxxxxx>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * Documentation/RCU
+ *
+ * Rewrite based on a global state machine
+ * (C) Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>, 2008
+ */
+
+#ifndef __LINUX_RCUCLASSIC_H
+#define __LINUX_RCUCLASSIC_H
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+#include <linux/rcucpumask.h>
+
+/*
+ * global state machine:
+ * - each cpu regularly check the global state and compares it with it's own local state.
+ * - if both state do not match, then the cpus do the required work and afterwards
+ * - update their local state
+ * - clear their bit in the cpu bitmask.
+ * The state machine is protected by the protocol:
+ * The state can only change when all cpus have completed the current stage, thus
+ * random changes cannot happen.
+ * The only exception is the change from RCU_STATE_DESTROY to RCU_STATE_DESTROY_AND_COLLECT,
+ * but this change doesn't matter, because RCU_STATE_DESTROY is a subset of
+ * RCU_STATE_DESTROY_AND_COLLECT.
+ *
+ */
+
+#define RCU_STATE_INVALID 0
+
+/* RCU_STATE_DESTROY:
+ * call callbacks that were registered by call_rcu for the objects in rcu_cpu_state.old
+ */
+#define RCU_STATE_DESTROY 1
+/* RCU_STATE_DESTROY_AND_COLLECT:
+ * - call callbacks that were registered by call_rcu for the objects in rcu_cpu_state.old
+ * - move the objects from rcu_cpu_state.new to rcu_cpu_state.new
+ */
+#define RCU_STATE_DESTROY_AND_COLLECT 2
+/* RCU_STATE_GRACE
+ * - wait for a quiescent state
+ */
+#define RCU_STATE_GRACE 3
+
+#define RCU_STATE_SHIFT 2
+
+struct rcu_global_state {
+ spinlock_t lock;
+ int start_immediately;
+ long completed;
+ struct rcu_cpumask cpus;
+
+ atomic_t poller_cpus;
+} ____cacheline_internodealigned_in_smp;
+
+/*
+ * Global state handling:
+ * - The global state is stored in rgs->cpus.state. This allows
+ * an atomic update of the state and the outstanding cpus.
+ * - Only the low 2 bits of 'state' are the actual state, the upper bits are a
+ * counter.
+ * - If the local state (rcs->state) is not equal to the global state, then
+ * something needs to be done.
+ * - When in nohz mode, rcs->state contains the whole global state, including
+ * the counter.
+ * - When in delayed mode, rcs->state contains only the low two bits.
+ * - When switching to nohz mode, rcs->state is initialized to
+ * RCU_STATE_INVALID.
+ * - When switching to delayed mode, rcs->state is initialized by reading
+ * from rgs->cpus.
+ */
+static inline int rcu_buildstate(int state, int count)
+{
+ return (count << RCU_STATE_SHIFT) + state;
+}
+
+static inline int rcu_getstate(int state)
+{
+ return ((1 << RCU_STATE_SHIFT)-1) & state;
+}
+
+static inline int rcu_getglobalstate(struct rcu_global_state *rgs)
+{
+ return rcu_getstate(rcu_cpumask_getstate(&rgs->cpus));
+}
+
+struct rcu_cpu_state {
+ int state;
+
+#ifdef CONFIG_NO_HZ
+ int kick_poller;
+#endif
+
+ /* new objects, directly from call_rcu().
+ * The list are length-based, not NULL-terminated.
+ */
+ struct rcu_head *new; /* new objects */
+ struct rcu_head **newtail;
+ long newqlen; /* # of queued callbacks */
+
+ unsigned long timeout;
+
+ /* objects that are in rcu grace processing. The actual
+ * state depends on rcu_cpumask_getstate(&rgs->cpus);
+ */
+ struct rcu_head *old;
+ struct rcu_head **oldtail;
+ long oldqlen;
+
+ /*
+ * quiescent state looking:
+ * When the cpu sees RCU_STATE_DESTROY_AND_COLLECT, it clears looking.
+ * When the cpu sees RCU_STATE_GRACE, it sets looking and clears
+ * quiet.
+ * If looking and quiet are both set, then there was a grace period,
+ * even if the state machine is called from non-idle context.
+ */
+ int quiet;
+ int looking;
+};
+
+/* Note: only one structure for _bh and _normal. */
+struct rcu_cpu_dead {
+ /*
+ * objects that are scheduled for immediate call of
+ * ->func().
+ */
+ struct rcu_head *dead;
+ struct rcu_head **deadtail;
+ long deadqlen;
+
+ long batchcount;
+};
+
+/*
+ * rcu_cpumode:
+ * RCU_CPUMODE_DELAYED:
+ * "normal" rcu behavior: the scheduler and the timer interrupt
+ * check for grace periods, read side critical sections are permitted
+ * everywhere.
+ *
+ * RCU_CPUMODE_NOHZ:
+ * This cpu is sitting in the idle thread, with disabled hz timer.
+ * These cpus are polled. NOHZ cpus must:
+ * - add themselv to the rcu_nohz_mask on irq and nmi entry.
+ * rcu_nohz_mask is read in each interrupt on a nohz cpu, thus test and
+ * set must be used.
+ * - increase total_count on {irq,nmi} entry. The poller uses that information
+ * to decide if a cpu is so offline that it can be removed from
+ * rcu_nohz_mask. (Positive effect: The cpu will be skipped when checking
+ * for grace periods - possibly for a long time. Negative effect:
+ * The next irq will trash the cache-line of rcu_nohz_mask)
+ * - increase in_{irq,nmi}_count on {irq,nmi} entry, decrease it on {irq,nmi}
+ * exit
+ * - if both in_{nmi,irq}_count are 0 on {irq,nmi} {entry,exit}, then do for
+ * _normal and_bh:
+ * - set the per-cpu state to the global state.
+ * - only for irq exit:
+ * - if kick_poller is set, then kick the poll task.
+ * - decrementing in_irq_count and to kick_poller are protected by poller_lock.
+ * - cpu_mode is only updated by the current cpu
+ */
+
+#define RCU_CPUMODE_INVALID 0
+#define RCU_CPUMODE_DELAYED 1
+#define RCU_CPUMODE_NOHZ 2
+
+struct rcu_percpu_data {
+ int cpu_mode;
+
+#ifdef CONFIG_NO_HZ
+ atomic_t total_count;
+
+ int in_nmi_count;
+ int in_irq_count;
+ spinlock_t poller_lock;
+#endif
+
+ struct rcu_cpu_state state_normal;
+ struct rcu_cpu_state state_bh;
+ struct rcu_cpu_dead data_dead;
+};
+
+DECLARE_PER_CPU(struct rcu_percpu_data, rcu_percpu);
+
+extern long rcu_batches_completed(void);
+extern long rcu_batches_completed_bh(void);
+
+extern int rcu_pending(int cpu);
+extern int rcu_needs_cpu(int cpu);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire() \
+ lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire() do { } while (0)
+# define rcu_read_release() do { } while (0)
+#endif
+
+#define __rcu_read_lock() \
+ do { \
+ preempt_disable(); \
+ __acquire(RCU); \
+ rcu_read_acquire(); \
+ } while (0)
+#define __rcu_read_unlock() \
+ do { \
+ rcu_read_release(); \
+ __release(RCU); \
+ preempt_enable(); \
+ } while (0)
+#define __rcu_read_lock_bh() \
+ do { \
+ local_bh_disable(); \
+ __acquire(RCU_BH); \
+ rcu_read_acquire(); \
+ } while (0)
+#define __rcu_read_unlock_bh() \
+ do { \
+ rcu_read_release(); \
+ __release(RCU_BH); \
+ local_bh_enable(); \
+ } while (0)
+
+extern void __rcu_init(void);
+#define rcu_init_sched() do { } while (0)
+
+extern void __synchronize_sched(void);
+extern void rcu_check_callbacks(int cpu, int user);
+
+#ifdef CONFIG_NO_HZ
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+#else /* CONFIG_NO_HZ */
+#define rcu_enter_nohz() do { } while (0)
+#define rcu_exit_nohz() do { } while (0)
+#endif /* CONFIG_NO_HZ */
+
+static inline void rcu_qsctr_inc(int cpu)
+{
+ per_cpu(rcu_percpu, cpu).state_normal.quiet = 1;
+ per_cpu(rcu_percpu, cpu).state_bh.quiet = 1;
+}
+
+static inline void rcu_bh_qsctr_inc(int cpu)
+{
+ per_cpu(rcu_percpu, cpu).state_bh.quiet = 1;
+}
+
+#endif /* __LINUX_RCUCLASSIC_H */
diff --git a/init/Kconfig b/init/Kconfig
index c11da38..88286ba 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -903,10 +903,20 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool

+config STATE_RCU
+ bool
+ default y
+ help
+ This option selects a state machine based RCU implementation.
+ It's a replacement for the "classic" rcu implementation that
+ aims simpler code and better scalability.
+ If unsure, say N.
+
config CLASSIC_RCU
- def_bool !PREEMPT_RCU
+ def_bool !PREEMPT_RCU && !STATE_RCU
help
This option selects the classic RCU implementation that is
designed for best read-side performance on non-realtime
systems. Classic RCU is the default. Note that the
PREEMPT_RCU symbol is used to select/deselect this option.
+
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df..6bc9503 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
+obj-$(CONFIG_STATE_RCU) += rcustate.o rcucpumask.o
obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
ifeq ($(CONFIG_PREEMPT_RCU),y)
obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 46a8bbd..2c6bc29 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
struct take_cpu_down_param *param = _param;
int err;

- raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
- param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
return err;

+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
+
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e5..e14e6b2 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -296,6 +296,13 @@ void call_rcu_bh(struct rcu_head *head,
}
EXPORT_SYMBOL_GPL(call_rcu_bh);

+void call_rcu_sched(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu))
+{
+ call_rcu(head, func);
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
@@ -764,6 +771,17 @@ static struct notifier_block __cpuinitdata rcu_nb = {
.notifier_call = rcu_cpu_notify,
};

+#ifdef CONFIG_NO_HZ
+
+void rcu_irq_enter(int in_nmi)
+{
+}
+
+void rcu_irq_exit(int in_nmi)
+{
+}
+#endif
+
/*
* Initializes rcu mechanism. Assumed to be called early.
* That is before local timer(SMP) or jiffie timer (uniproc) is setup.
diff --git a/kernel/rcucpumask.c b/kernel/rcucpumask.c
new file mode 100644
index 0000000..436862c
--- /dev/null
+++ b/kernel/rcucpumask.c
@@ -0,0 +1,93 @@
+/*
+ * Scalable cpu mask for rcu.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * (C) Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>, 2008
+ *
+ */
+#include <linux/rcucpumask.h>
+#include <linux/bug.h>
+
+#ifdef RCUCPUMASK_FLAT
+
+void rcu_cpumask_init(struct rcu_cpumask *rcm, int newstate, int setupcpus)
+{
+ BUG_ON(!irqs_disabled());
+
+ spin_lock(&rcm->lock);
+
+ rcm->state = newstate;
+ atomic_set(&rcm->cpus_open, setupcpus ? rcm->cpus_total : 0);
+
+ spin_unlock(&rcm->lock);
+}
+
+int rcu_cpumask_clear_and_test(struct rcu_cpumask *rcm, int cpu)
+{
+ int ret;
+
+ BUG_ON(atomic_read(&rcm->cpus_open) <= 0);
+ /*
+ * atomic_dec_and_test() implies a memory barrier, thus no mb()
+ * required.
+ * ret 1: value now 0
+ */
+ ret = atomic_dec_and_test(&rcm->cpus_open);
+
+ return ret;
+}
+
+int rcu_cpumask_addcpu(struct rcu_cpumask *rcm, int cpu)
+{
+ int ret;
+ unsigned long flags;
+
+ /*
+ * This function is called both during early bootup (irqs disabled)
+ * and during "normal" CPU_UP notifiers (irqs enabled).
+ */
+ spin_lock_irqsave(&rcm->lock, flags);
+
+ rcm->cpus_total++;
+ ret = rcm->state;
+
+ spin_unlock_irqrestore(&rcm->lock, flags);
+
+ return ret;
+}
+
+int rcu_cpumask_removecpu(struct rcu_cpumask *rcm, int cpu)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcm->lock, flags);
+
+ rcm->cpus_total--;
+ ret = rcm->state;
+
+ spin_unlock_irqrestore(&rcm->lock, flags);
+
+ return ret;
+}
+
+#endif /* RCUCPUMASK_FLAT */
+
+#ifdef RCUCPUMASK_HIERARCHICAL
+
+bla
+
+#endif /* RCUCPUMASK_HIERARCHICAL */
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index ca4bbbe..ab18347 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -434,13 +434,13 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
static DEFINE_PER_CPU(int, rcu_update_flag);

/**
- * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
+ * __rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
*
* If the CPU was idle with dynamic ticks active, this updates the
* rcu_dyntick_sched.dynticks to let the RCU handling know that the
* CPU is active.
*/
-void rcu_irq_enter(void)
+void __rcu_irq_enter(int in_nmi)
{
int cpu = smp_processor_id();
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
@@ -510,7 +510,7 @@ void rcu_irq_enter(void)
* rcu_dyntick_sched.dynticks to put let the RCU handling be
* aware that the CPU is going back to idle with no ticks.
*/
-void rcu_irq_exit(void)
+void __rcu_irq_exit(int in_nmi)
{
int cpu = smp_processor_id();
struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
diff --git a/kernel/rcustate.c b/kernel/rcustate.c
new file mode 100644
index 0000000..deb1d1e
--- /dev/null
+++ b/kernel/rcustate.c
@@ -0,0 +1,1136 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2001
+ *
+ * Authors: Dipankar Sarma <dipankar@xxxxxxxxxx>
+ * Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>
+ *
+ * Based on the original work by Paul McKenney <paulmck@xxxxxxxxxx>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * Documentation/RCU
+ *
+ * Rewrite based on a global state machine
+ * (C) Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>, 2008
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/rcupdate.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/time.h>
+
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
+/* Definition for rcupdate control block. */
+static struct rcu_global_state rcu_global_state_normal = {
+ .lock = __SPIN_LOCK_UNLOCKED(&rcu_global_state_normal.lock),
+ .start_immediately = 0,
+ .cpus = __RCU_CPUMASK_INIT(&rcu_global_state_normal.cpus)
+};
+
+static struct rcu_global_state rcu_global_state_bh = {
+ .lock = __SPIN_LOCK_UNLOCKED(&rcu_global_state_bh.lock),
+ .start_immediately = 0,
+ .cpus = __RCU_CPUMASK_INIT(&rcu_global_state_bh.cpus)
+};
+
+DEFINE_PER_CPU(struct rcu_percpu_data, rcu_percpu);
+
+cpumask_t rcu_nohz_mask;
+
+int qlowmark = 100;
+
+#define RCU_IRQ_INIT 8
+#define RCU_IRQ_MAX 128
+#define RCU_IRQ_DOWN 2
+
+#define RCU_STRUCT_NORMAL 1
+#define RCU_STRUCT_BH 2
+
+static inline struct rcu_cpu_state *rcu_get_rcs(int rcu_struct, int cpu)
+{
+ switch (rcu_struct) {
+ case RCU_STRUCT_NORMAL:
+ return &per_cpu(rcu_percpu, cpu).state_normal;
+ case RCU_STRUCT_BH:
+ return &per_cpu(rcu_percpu, cpu).state_bh;
+ }
+ BUG();
+}
+
+static inline struct rcu_global_state *rcu_get_rgs(int rcu_struct)
+{
+ switch (rcu_struct) {
+ case RCU_STRUCT_NORMAL:
+ return &rcu_global_state_normal;
+ case RCU_STRUCT_BH:
+ return &rcu_global_state_bh;
+ }
+ BUG();
+}
+
+
+long rcu_batches_completed(void)
+{
+ return rcu_global_state_normal.completed;
+}
+
+long rcu_batches_completed_bh(void)
+{
+ return rcu_global_state_normal.completed;
+}
+
+static void rcu_state_init(struct rcu_global_state *rgs, int state)
+{
+ int init_cpus;
+
+ if (state == RCU_STATE_DESTROY)
+ init_cpus = 0;
+ else
+ init_cpus = 1;
+ rcu_cpumask_init(&rgs->cpus, rcu_buildstate(state, rgs->completed), init_cpus);
+}
+
+/**
+ * rcu_state_startcycle - start the next rcu cycle
+ * @rgs: global rcu state
+ *
+ * The function starts the next rcu cycle, either immediately or
+ * by setting rgs->start_immediately.
+ * Local interrupts are disabled, the current cpu is tracked
+ * (either due to RCU_CPUMODE_DELAYED or because it's listed in
+ * rcu_nohz_mask or because it's listed in poller_cpus).
+ * Thus it's impossible that start_immediately goes to 0 and
+ * the entries listed in rcs->new are not included in the
+ * grace period.
+ */
+static void rcu_state_startcycle(struct rcu_global_state *rgs)
+{
+ BUG_ON(!irqs_disabled());
+
+ if (rgs->start_immediately == 0) {
+ spin_lock(&rgs->lock);
+ switch(rcu_getglobalstate(rgs)) {
+ case RCU_STATE_DESTROY_AND_COLLECT:
+ case RCU_STATE_GRACE:
+ rgs->start_immediately = 1;
+ break;
+ case RCU_STATE_DESTROY:
+ rcu_state_init(rgs, RCU_STATE_DESTROY_AND_COLLECT);
+ BUG_ON(rgs->start_immediately);
+ break;
+ default:
+ BUG();
+ }
+ spin_unlock(&rgs->lock);
+ }
+}
+
+/*
+ * Delay that can occur for synchronize_rcu() callers
+ */
+#define RCU_MAX_DELAY (HZ/30+1)
+
+static void rcu_checkqlen(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int inc)
+{
+ BUG_ON(!irqs_disabled());
+ if (unlikely(rcs->newqlen == 0)) {
+ rcs->timeout = jiffies + RCU_MAX_DELAY;
+ }
+ if ((rcs->newqlen < qlowmark) && (rcs->newqlen+inc >= qlowmark))
+ rcu_state_startcycle(rgs);
+
+ rcs->newqlen += inc;
+
+ /*
+ * This is not really a bug, it might happen when interrupt calls
+ * call_rcu() while the cpu is in nohz mode. see rcu_irq_exit
+ */
+ WARN_ON( (rcs->newqlen >= qlowmark) && (rcu_getglobalstate(rgs) == RCU_STATE_DESTROY));
+}
+
+
+static void __call_rcu(struct rcu_head *head, struct rcu_global_state *rgs,
+ struct rcu_cpu_state *rcs)
+{
+ if (rcs->new == NULL) {
+ rcs->new = head;
+ } else {
+ (*rcs->newtail) = head;
+ }
+ rcs->newtail = &head->next;
+
+ rcu_checkqlen(rgs, rcs, 1);
+}
+
+void call_rcu_sched(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu))
+{
+ call_rcu(head, func);
+}
+
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
+/*
+ * Wait until all currently running preempt_disable() code segments
+ * (including hardware-irq-disable segments) complete. Note that
+ * in -rt this does -not- necessarily result in all currently executing
+ * interrupt -handlers- having completed.
+ */
+synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
+EXPORT_SYMBOL_GPL(__synchronize_sched);
+
+
+void call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu))
+{
+ unsigned long flags;
+
+ head->func = func;
+ local_irq_save(flags);
+ __call_rcu(head, &rcu_global_state_normal, &__get_cpu_var(rcu_percpu).state_normal);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+void call_rcu_bh(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu))
+{
+ unsigned long flags;
+
+ head->func = func;
+ local_irq_save(flags);
+ __call_rcu(head, &rcu_global_state_bh, &__get_cpu_var(rcu_percpu).state_bh);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
+#define RCU_BATCH_MIN 100
+#define RCU_BATCH_INCFACTOR 2
+#define RCU_BATCH_DECFACTOR 4
+
+static void rcu_move_and_raise(struct rcu_cpu_state *rcs, int do_raise)
+{
+ struct rcu_cpu_dead *rcd;
+
+ BUG_ON(!irqs_disabled());
+ rcd = &__get_cpu_var(rcu_percpu).data_dead;
+
+ /* update batch limit:
+ * - if there are still old entries when new entries are added:
+ * double the batch count.
+ * - if there are no old entries: reduce it by 25%, but never below 100.
+ */
+ if (rcd->deadqlen)
+ rcd->batchcount = rcd->batchcount*RCU_BATCH_INCFACTOR;
+ else
+ rcd->batchcount = rcd->batchcount-rcd->batchcount/RCU_BATCH_DECFACTOR;
+ if (rcd->batchcount < RCU_BATCH_MIN)
+ rcd->batchcount = RCU_BATCH_MIN;
+
+ if (rcs->old != NULL) {
+ if (rcd->dead == NULL) {
+ rcd->dead = rcs->old;
+ } else {
+ (*rcd->deadtail) = rcs->old;
+ }
+ rcd->deadtail = rcs->oldtail;
+ rcd->deadqlen += rcs->oldqlen;
+ }
+
+ rcs->old = NULL;
+ rcs->oldtail = NULL;
+ rcs->oldqlen = 0;
+
+ if (do_raise)
+ raise_softirq(RCU_SOFTIRQ);
+}
+
+static void rcu_advance_state(struct rcu_global_state *rgs)
+{
+ BUG_ON(!irqs_disabled());
+ spin_lock(&rgs->lock);
+
+ /*
+ * advance the state machine:
+ * - from COLLECT to GRACE
+ * - from GRACE to DESTROY/COLLECT
+ */
+ switch(rcu_getglobalstate(rgs)) {
+ case RCU_STATE_DESTROY_AND_COLLECT:
+ rcu_state_init(rgs, RCU_STATE_GRACE);
+ break;
+ case RCU_STATE_GRACE:
+ rgs->completed++;
+ if (rgs->start_immediately) {
+ rcu_state_init(rgs, RCU_STATE_DESTROY_AND_COLLECT);
+ } else {
+ rcu_state_init(rgs, RCU_STATE_DESTROY);
+ }
+ rgs->start_immediately = 0;
+ break;
+ default:
+ BUG();
+ }
+ spin_unlock(&rgs->lock);
+}
+
+static void __rcu_kick_poller(struct rcu_percpu_data *rps, struct rcu_global_state *rgs)
+{
+ if (rps->state_normal.kick_poller) {
+ rps->state_normal.kick_poller = 0;
+ if (atomic_dec_and_test(&rgs->poller_cpus))
+ rcu_advance_state(rgs);
+ }
+}
+static void rcu_kick_poller(struct rcu_percpu_data *rps)
+{
+ BUG_ON(!irqs_disabled());
+ BUG_ON(!spin_is_locked(&rps->poller_lock));
+
+ __rcu_kick_poller(rps, &rcu_global_state_normal);
+ __rcu_kick_poller(rps, &rcu_global_state_bh);
+}
+
+
+/**
+ * rcu_update_irqstate(cpu)
+ * @cpu: cpu to update
+ *
+ * cpu is a nohz cpu. This function decides if the cpu should be polled
+ * or if if it should be removed entirely from the grace period handling.
+ * Cpus that are removed entirely cannot take interrupts, they must
+ * add themselves back into rcu_nohz_mask() on irq/nmi entry.
+ */
+static void rcu_update_irqstate(int cpu)
+{
+ int rem;
+ struct rcu_percpu_data *rps;
+
+ rps = &per_cpu(rcu_percpu, cpu);
+
+ BUG_ON(!spin_is_locked(&rps->poller_lock));
+ BUG_ON(rps->cpu_mode != RCU_CPUMODE_NOHZ);
+
+ rem = atomic_read(&rps->total_count);
+ if (rem > RCU_IRQ_MAX)
+ rem = rem - RCU_IRQ_MAX;
+ else
+ rem = (rem + RCU_IRQ_DOWN - 1) / RCU_IRQ_DOWN;
+ atomic_sub(rem, &rps->total_count);
+
+ if (atomic_read(&rps->total_count) == 0) {
+ cpu_clear(cpu, rcu_nohz_mask);
+ }
+}
+
+static void rcu_do_poll(struct work_struct *reason);
+
+static DECLARE_WORK(rcu_work_normal, rcu_do_poll);
+static DECLARE_WORK(rcu_work_bh, rcu_do_poll);
+
+static void rcu_do_poll(struct work_struct *reason)
+{
+ struct rcu_global_state *rgs;
+ int rcu_struct, cpu, global_state;
+
+ if (reason == &rcu_work_normal) {
+ rcu_struct = RCU_STRUCT_NORMAL;
+ } else if (reason == &rcu_work_bh) {
+ rcu_struct = RCU_STRUCT_BH;
+ } else {
+ BUG();
+ }
+ rgs = rcu_get_rgs(rcu_struct);
+
+ atomic_set(&rgs->poller_cpus, 1);
+ global_state = rcu_cpumask_getstate(&rgs->cpus);
+
+ for_each_cpu_mask(cpu, rcu_nohz_mask) {
+ struct rcu_percpu_data *rps;
+ struct rcu_cpu_state *rcs;
+
+ rps = &per_cpu(rcu_percpu, cpu);
+ rcs = rcu_get_rcs(rcu_struct, cpu);
+
+ if (rcs->state == global_state)
+ continue;
+
+ BUG_ON(irqs_disabled());
+ spin_lock_irq(&rps->poller_lock);
+ if (rps->cpu_mode != RCU_CPUMODE_NOHZ)
+ goto continue_unlock;
+ if (rcs->state == global_state)
+ goto continue_unlock;
+ if (rps->in_irq_count) {
+ /*
+ * Ok, we have lost:
+ * - The cpu is in nohz mode
+ * - The cpu did not complete a single irq since the
+ * global state was modified to RCU_STATE_GRACE.
+ * - The cpu is inside an irq.
+ * That means the cpu could be inside a rcu read side
+ * critical section. Request that the cpu should kick
+ * the rcu subsystem on irq exit and continue.
+ */
+ atomic_inc(&rgs->poller_cpus);
+ rcs->kick_poller = 1;
+ } else {
+ /* Even worse: The cpu is in an NMI.
+ * NMIs can't kick the rcu subsystem, thus we must
+ * wait until the NMI exits. Note that this is
+ * exceptionally rare, it can only happen if an NMI
+ * doesn't exit for multiple jiffies.
+ */
+ while (rps->in_nmi_count) {
+ cpu_relax();
+ }
+ rcs->state = global_state;
+ }
+ rcu_update_irqstate(cpu);
+continue_unlock:
+ spin_unlock_irq(&rps->poller_lock);
+ }
+ if (atomic_dec_and_test(&rgs->poller_cpus)) {
+ local_irq_disable();
+ rcu_advance_state(rgs);
+ local_irq_enable();
+ }
+}
+
+/**
+ * rcu_state_delayedcpus_done(rgs)
+ * @rgs: rcu global state
+ *
+ * 2nd part of the rcu grace period processing: all RCU_CPUMODE_DELAYED cpus
+ * completed. For RCU_STATE_GRACE (and only for this state), the
+ * RCU_CPUMODE_NOHZ cpus must be scanned as well.
+ * No need for any locking: the last RCU_CPUMODE_DELAYED cpu calls this
+ * function. "Last" is ensured by atomic_dec_and_test(), thus concurrent calls
+ * are impossible.
+ */
+static void rcu_state_delayedcpus_done(struct rcu_global_state *rgs, int rcu_struct)
+{
+ if (rcu_getglobalstate(rgs) != RCU_STATE_GRACE) {
+ rcu_advance_state(rgs);
+ return;
+ }
+ switch (rcu_struct) {
+ case RCU_STRUCT_NORMAL:
+ schedule_work(&rcu_work_normal);
+ break;
+ case RCU_STRUCT_BH:
+ schedule_work(&rcu_work_bh);
+ break;
+ default:
+ BUG();
+ }
+}
+
+static void __rcu_state_machine(int rcu_struct, int global_state, int is_quiet, int do_raise, int cpu)
+{
+ int inc_state;
+ struct rcu_global_state *rgs;
+ struct rcu_cpu_state *rcs;
+
+ BUG_ON(!irqs_disabled());
+
+ rgs = rcu_get_rgs(rcu_struct);
+ rcs = rcu_get_rcs(rcu_struct, cpu);
+ /*
+ * Theoretically, this code should run under spin_lock(&rgs->lock),
+ * But: important chages (i.e. from COLLECT to GRACE,
+ * from GRACE to DESTROY) only happen when all cpus have completed
+ * their work. If rcu_getglobalstate(rgs) != rcs->state, then we haven't completed
+ * our work yet. Thus such a change cannot happen.
+ * The only change that might happen is a change from RCU_STATE_DESTROY
+ * to RCU_STATE_DESTROY_AND_COLLECT. We'll notice that in the next
+ * round.
+ * no need for an mb() either - it simply doesn't matter.
+ * Actually: when rcu_state_startcycle() is called, then it's guaranteed
+ * that global_state and rcu_getglobalstate(rgs) do not match...
+ */
+ if (global_state == RCU_STATE_DESTROY && rcs->newqlen > 0 &&
+ time_after(jiffies, rcs->timeout) && do_raise) {
+ rcu_state_startcycle(rgs);
+ }
+
+ if (global_state == rcs->state)
+ return;
+
+ inc_state = 0;
+ switch(global_state) {
+ case RCU_STATE_DESTROY:
+ /* enforce the state machine:
+ * DESTROY is only possible after GRACE
+ */
+ BUG_ON(rcs->state != RCU_STATE_GRACE);
+ rcs->state = RCU_STATE_DESTROY;
+ rcu_move_and_raise(rcs, do_raise);
+ break;
+ case RCU_STATE_DESTROY_AND_COLLECT:
+ BUG_ON( (rcs->state != RCU_STATE_DESTROY) && (rcs->state != RCU_STATE_GRACE) );
+ rcs->state = RCU_STATE_DESTROY_AND_COLLECT;
+ rcu_move_and_raise(rcs, do_raise);
+ rcs->old = rcs->new;
+ rcs->oldtail = rcs->newtail;
+ rcs->oldqlen = rcs->newqlen;
+ rcs->new = NULL;
+ rcs->newtail = NULL;
+ rcs->newqlen = 0;
+ rcs->looking = 0;
+ if (rcu_cpumask_clear_and_test(&rgs->cpus, cpu))
+ inc_state = 1;
+ break;
+ case RCU_STATE_GRACE:
+ BUG_ON(rcs->state != RCU_STATE_DESTROY_AND_COLLECT);
+ if (is_quiet || (rcs->quiet && rcs->looking)) {
+ rcs->state = RCU_STATE_GRACE;
+ if (rcu_cpumask_clear_and_test(&rgs->cpus, cpu))
+ inc_state = 1;
+ }
+ rcs->quiet = 0;
+ rcs->looking = 1;
+ break;
+ default:
+ BUG();
+ }
+ if (unlikely(inc_state)) {
+ BUG_ON(rcu_getglobalstate(rgs) != rcs->state);
+ BUG_ON(rcu_getglobalstate(rgs) != global_state);
+
+ rcu_state_delayedcpus_done(rgs, rcu_struct);
+ }
+}
+
+static void rcu_state_machine(int rcu_struct, int is_quiet, int cpu)
+{
+ int global_state;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ global_state = rcu_getglobalstate(rcu_get_rgs(rcu_struct));
+
+ /* gcc should not optimize away the local variable global_state... */
+ barrier();
+ __rcu_state_machine(rcu_struct, global_state, is_quiet, 1, cpu);
+ local_irq_restore(flags);
+}
+
+#if defined(CONFIG_HOTPLUG_CPU) || defined (CONFIG_NO_HZ)
+
+static void __rcu_remove_cpu(int rcu_struct, int cpu)
+{
+ int global_state;
+ struct rcu_global_state *rgs;
+
+ BUG_ON(!irqs_disabled());
+
+ rgs = rcu_get_rgs(rcu_struct);
+
+ /*
+ * Figure out what this cpu is still supposed to do.
+ * We rely on the lock inside the rcu_cpumask, that guarantees that
+ * we neither do too much nor too little.
+ * But do not raise the softirq, the caller is responsible handling
+ * the entries still in the queues.
+ */
+ global_state = rcu_cpumask_removecpu(&rgs->cpus, cpu);
+ global_state = rcu_getstate(global_state);
+
+ /*
+ * ensure that we are not in the middle of updating
+ * rcu_getglobalstate(&rgs->cpus): otherwise __rcu_state_machine()
+ * would return with "nothing to do", although
+ * the cpu must do something.
+ */
+ spin_unlock_wait(&rgs->lock);
+
+ __rcu_state_machine(rcu_struct, global_state, 1, 0, cpu);
+ rcu_get_rcs(rcu_struct, cpu)->state = RCU_STATE_INVALID;
+}
+
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+/**
+ * rcu_bulk_add - bulk add new rcu objects.
+ * @rgs: global rcu state
+ * @rcs: cpu state
+ * @h: linked list of rcu objects.
+ *
+ * Must be called with enabled local interrupts
+ */
+static void rcu_bulk_add(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, struct rcu_head *h, struct rcu_head **htail, int len)
+{
+
+ BUG_ON(!irqs_disabled());
+
+ if (len > 0) {
+ if (rcs->new == NULL) {
+ rcs->new = h;
+ } else {
+ (*rcs->newtail) = h;
+ }
+ rcs->newtail = htail;
+
+ rcu_checkqlen(rgs, rcs, len);
+ }
+}
+
+static void __rcu_offline_cpu(int rcu_struct, struct rcu_cpu_state *target_rcs)
+{
+ int cpu = smp_processor_id();
+ struct rcu_global_state *rgs;
+ struct rcu_cpu_state *dying_rcs;
+
+ rgs = rcu_get_rgs(rcu_struct);
+ dying_rcs = rcu_get_rcs(rcu_struct, cpu);
+
+ /*
+ * task 1: Do the work that the other cpu is still supposed to do.
+ * offlining a nohz cpu is special, then nothing needs to be done:
+ * everything was done by the last irq_exit().
+ */
+ BUG_ON(!irqs_disabled());
+ if (per_cpu(rcu_percpu, cpu).cpu_mode == RCU_CPUMODE_DELAYED) {
+ __rcu_remove_cpu(rcu_struct, cpu);
+ }
+
+ /* task 2: move all entries from the new cpu into the lists of the current cpu.
+ * locking: The other cpu is in stop_machine, thus no locks are required.
+ * Thus it's more or less a bulk call_rcu().
+ * For the sake of simplicity, all objects are treated as "new", even the objects
+ * that are already in old.
+ */
+ rcu_bulk_add(rgs, target_rcs, dying_rcs->new, dying_rcs->newtail, dying_rcs->newqlen);
+ dying_rcs->new = NULL;
+ dying_rcs->newtail = NULL;
+ dying_rcs->newqlen = 0;
+ rcu_bulk_add(rgs, target_rcs, dying_rcs->old, dying_rcs->oldtail, dying_rcs->oldqlen);
+ dying_rcs->old = NULL;
+ dying_rcs->oldtail = NULL;
+ dying_rcs->oldqlen = 0;
+}
+
+/**
+ * rcu_offline_cpu(cpu): Offline a cpu
+ * @cpu: cpu to offline.
+ *
+ * The function does all work required to offline @cpu. It's called from
+ * stop_machine(). It moves the work that is still pending to a cpu that
+ * is online.
+ */
+static void rcu_offline_cpu(int cpu)
+{
+ int surviving_cpu;
+ struct rcu_percpu_data *surviving_rps;
+ struct rcu_cpu_dead *dying_rcd;
+
+ BUG_ON(!irqs_disabled());
+ BUG_ON(cpu != smp_processor_id());
+
+ /* stop 1: find a victim cpu that will inherit the outstanding
+ * work.
+ */
+ surviving_cpu = cpu+1;
+ do {
+ if (cpu_online(surviving_cpu))
+ break;
+ surviving_cpu++;
+ if (surviving_cpu == NR_CPUS)
+ surviving_cpu = 0;
+ BUG_ON(surviving_cpu == cpu);
+ } while (1);
+ surviving_rps = &per_cpu(rcu_percpu, surviving_cpu);
+
+ /* step 2: move new & old lists, clear cpu bitmask */
+
+ __rcu_offline_cpu(RCU_STRUCT_NORMAL, &surviving_rps->state_normal);
+ __rcu_offline_cpu(RCU_STRUCT_BH, &surviving_rps->state_bh);
+
+ /* step 3: move dead list */
+
+ dying_rcd = &__get_cpu_var(rcu_percpu).data_dead;
+ if (dying_rcd->dead != NULL) {
+ if (surviving_rps->data_dead.dead == NULL) {
+ surviving_rps->data_dead.dead = dying_rcd->dead;
+ } else {
+ (*surviving_rps->data_dead.deadtail) = dying_rcd->dead;
+ }
+ surviving_rps->data_dead.deadtail = dying_rcd->deadtail;
+ surviving_rps->data_dead.deadqlen += dying_rcd->deadqlen;
+ dying_rcd->dead = NULL;
+ dying_rcd->deadtail = NULL;
+ dying_rcd->deadqlen = 0;
+ local_irq_enable();
+ }
+
+ /* step 4: mark the cpu as invalid */
+ __get_cpu_var(rcu_percpu).cpu_mode = RCU_CPUMODE_INVALID;
+ cpu_clear(cpu, rcu_nohz_mask);
+
+ BUG_ON(rcu_needs_cpu(cpu));
+}
+
+#else
+
+static void rcu_offline_cpu(int cpu)
+{
+}
+
+#endif
+
+static int __rcu_pending(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs)
+{
+ /*
+ * This cpu must do something for the state machine.
+ */
+ if (rcu_getglobalstate(rgs) != rcs->state)
+ return 1;
+ /*
+ * The state machine is stopped and the current
+ * cpu has outstanding rcu callbacks
+ */
+ if (rcs->state == RCU_STATE_DESTROY && rcs->newqlen)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * void rcu_pending(int cpu) - check for pending rcu related work.
+ * @cpu: cpu to check.
+ *
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, returning 1 if so. This function is part of the
+ * RCU implementation; it is -not- an exported member of the RCU API.
+ *
+ * This function is inherently racy: If it returns 1, then there is something
+ * to do. If it return 0, then there was nothing to do. It's possible that
+ * by the time rcu_pending returns, there is now something to do.
+ *
+ */
+int rcu_pending(int cpu)
+{
+ struct rcu_percpu_data *rps;
+
+ rps = &per_cpu(rcu_percpu, cpu);
+
+ return __rcu_pending(&rcu_global_state_normal, &rps->state_normal) ||
+ __rcu_pending(&rcu_global_state_bh, &rps->state_bh);
+}
+
+static int __rcu_needs_cpu(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs)
+{
+ if (rcs->new)
+ return 1;
+ if (rcs->old)
+ return 1;
+ return 0;
+}
+
+/**
+ * void rcu_needs_cpu(cpu) - check for outstanding rcu work.
+ * @cpu: cpu to check.
+ *
+ * Check to see if any future RCU-related work will need to be done
+ * by @cpu, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ *
+ * Locking only works properly if the function is called for the current
+ * cpu and with disabled local interupts. It's a prerequisite for
+ * rcu_nohz_enter() that rcu_needs_cpu() return 0. Local interupts must not
+ * be enabled in between, otherwise a softirq could call call_rcu().
+ *
+ * Note: rcu_needs_cpu() can be 0 (cpu not needed) even though rcu_pending()
+ * returns 1. This means that the outstanding work can be completed by either
+ * the CPU_DEAD callback or rcu_enter_nohz().
+ */
+int rcu_needs_cpu(int cpu)
+{
+ struct rcu_percpu_data *rps;
+
+ rps = &per_cpu(rcu_percpu, cpu);
+
+ return __rcu_needs_cpu(&rcu_global_state_normal, &rps->state_normal) ||
+ __rcu_needs_cpu(&rcu_global_state_bh, &rps->state_bh) ||
+ (rps->data_dead.deadqlen > 0);
+}
+
+/**
+ * rcu_check_callback(cpu, user) - external entry point for grace checking
+ * @cpu: cpu id.
+ * @user: user space was interrupted.
+ *
+ * Top-level function driving RCU grace-period detection, normally
+ * invoked from the scheduler-clock interrupt. This function simply
+ * increments counters that are read only from softirq by this same
+ * CPU, so there are no memory barriers required.
+ *
+ * This function can run with disabled local interrupts, thus all
+ * callees must use local_irq_save()
+ */
+void rcu_check_callbacks(int cpu, int user)
+{
+ struct rcu_percpu_data *rps;
+ int normal_quiet;
+ int bh_quiet;
+
+
+ if (user ||
+ (idle_cpu(cpu) && !in_softirq() &&
+ hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
+ /*
+ * Get here if this CPU took its interrupt from user
+ * mode or from the idle loop, and if this is not a
+ * nested interrupt. In this case, the CPU is in
+ * a quiescent state, so count it.
+ *
+ */
+ normal_quiet = 1;
+ bh_quiet = 1;
+
+ } else if (!in_softirq()) {
+ /*
+ * Get here if this CPU did not take its interrupt from
+ * softirq, in other words, if it is not interrupting
+ * a rcu_bh read-side critical section. This is an _bh
+ * critical section, so count it.
+ */
+ normal_quiet = 0;
+ bh_quiet = 1;
+ } else {
+ /*
+ * We are interrupting something. Nevertheless - check if we should collect
+ * rcu objects. This can be done from arbitrary context.
+ */
+ normal_quiet = 0;
+ bh_quiet = 0;
+ }
+ rps = &per_cpu(rcu_percpu, cpu);
+ rcu_state_machine(RCU_STRUCT_NORMAL, normal_quiet, cpu);
+ rcu_state_machine(RCU_STRUCT_BH, bh_quiet, cpu);
+}
+
+/*
+ * Invoke the completed RCU callbacks.
+ */
+static void rcu_do_batch(struct rcu_cpu_dead *rcd)
+{
+ struct rcu_head *list;
+ int i, count;
+
+ if (!rcd->deadqlen)
+ return;
+
+ /* step 1: pull up to rcs->batchcount objects */
+ BUG_ON(irqs_disabled());
+ local_irq_disable();
+
+ if (rcd->deadqlen > rcd->batchcount) {
+ struct rcu_head *walk;
+
+ list = rcd->dead;
+ count = rcd->batchcount;
+
+ walk = rcd->dead;
+ for (i=0;i<count;i++)
+ walk = walk->next;
+ rcd->dead = walk;
+
+ } else {
+ list = rcd->dead;
+ count = rcd->deadqlen;
+
+ rcd->dead = NULL;
+ rcd->deadtail = NULL;
+ }
+ rcd->deadqlen -= count;
+ BUG_ON(rcd->deadqlen < 0);
+
+ local_irq_enable();
+
+ /* step 2: call the rcu callbacks */
+
+ for (i=0;i<count;i++) {
+ struct rcu_head *next;
+
+ next = list->next;
+ prefetch(next);
+ list->func(list);
+ list = next;
+ }
+
+ /* step 3: if still entries left, raise the softirq again */
+ if (rcd->deadqlen)
+ raise_softirq(RCU_SOFTIRQ);
+}
+
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+ rcu_do_batch(&get_cpu_var(rcu_percpu).data_dead);
+ put_cpu_var(rcu_percpu);
+}
+
+static void __rcu_add_cpu(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int cpu)
+{
+ rcs->state = rcu_getstate(rcu_cpumask_addcpu(&rgs->cpus, cpu));
+}
+
+#ifdef CONFIG_NO_HZ
+
+void rcu_enter_nohz(void)
+{
+ struct rcu_percpu_data *rps;
+ int cpu = smp_processor_id();
+
+ /*
+ * call_rcu() between rcu_needs_cpu and rcu_enter_nohz() are
+ * not permitted.
+ * Thus both must be called with disabled local interrupts,
+ * without enabling the interrupts in between.
+ *
+ * Note: disabling interrupts only prevents call_rcu().
+ * it can obviously happen that another cpu forwards
+ * the state machine. That doesn't hurt: __rcu_remove_cpu()
+ * the the work that we need to do.
+ */
+ BUG_ON(!irqs_disabled());
+
+ rps = &__get_cpu_var(rcu_percpu);
+
+ __rcu_remove_cpu(RCU_STRUCT_NORMAL, cpu);
+ __rcu_remove_cpu(RCU_STRUCT_BH, cpu);
+ BUG_ON(rcu_needs_cpu(cpu));
+
+ BUG_ON(rps->cpu_mode != RCU_CPUMODE_DELAYED);
+ rps->cpu_mode = RCU_CPUMODE_NOHZ;
+
+ atomic_set(&rps->total_count, RCU_IRQ_INIT);
+
+ cpu_set(cpu, rcu_nohz_mask);
+}
+
+void rcu_exit_nohz(void)
+{
+ struct rcu_percpu_data *rps;
+ int cpu = smp_processor_id();
+
+ rps = &__get_cpu_var(rcu_percpu);
+
+ BUG_ON(!irqs_disabled());
+ BUG_ON(rps->in_irq_count != 0);
+ BUG_ON(rps->in_nmi_count != 0);
+ BUG_ON(rps->cpu_mode != RCU_CPUMODE_NOHZ);
+
+ spin_lock(&rps->poller_lock);
+ rcu_kick_poller(rps);
+ cpu_clear(cpu, rcu_nohz_mask);
+ rps->cpu_mode = RCU_CPUMODE_DELAYED;
+ spin_unlock(&rps->poller_lock);
+
+ __rcu_add_cpu(&rcu_global_state_normal, &rps->state_normal, cpu);
+ __rcu_add_cpu(&rcu_global_state_bh, &rps->state_bh, cpu);
+}
+
+void rcu_irq_enter(int in_nmi)
+{
+ struct rcu_percpu_data *rps;
+ int cpu = smp_processor_id();
+
+ rps = &__get_cpu_var(rcu_percpu);
+
+ BUG_ON(!irqs_disabled());
+
+ if (unlikely(rps->cpu_mode == RCU_CPUMODE_NOHZ)) {
+ if (unlikely(!cpu_isset(cpu, rcu_nohz_mask))) {
+ cpu_set(cpu, rcu_nohz_mask);
+ }
+ atomic_inc(&rps->total_count);
+
+ if (rps->in_irq_count == 0 && rps->in_nmi_count == 0) {
+ BUG_ON(rps->state_normal.kick_poller);
+ BUG_ON(rps->state_bh.kick_poller);
+
+ rps->state_normal.state = rcu_cpumask_getstate(&rcu_global_state_normal.cpus);
+ rps->state_bh.state = rcu_cpumask_getstate(&rcu_global_state_bh.cpus);
+ }
+ if (in_nmi) {
+ rps->in_nmi_count++;
+ } else {
+ rps->in_irq_count++;
+ }
+ /*
+ * Here an explicit mb() is required:
+ * All other memory ordering is enforced by the spinlock in rgs->cpus.
+ * For interrupt in nohz mode, this is not the case: The counters
+ * incs must be visible before any accesses to rcu protected memory,
+ * the counter dec after all accesses.
+ */
+ smp_mb();
+ }
+}
+
+void rcu_irq_exit(int in_nmi)
+{
+ struct rcu_percpu_data *rps;
+ rps = &__get_cpu_var(rcu_percpu);
+
+ BUG_ON(!irqs_disabled());
+
+
+ if (unlikely(rps->cpu_mode == RCU_CPUMODE_NOHZ)) {
+ smp_mb(); /* see rcu_irq_enter() */
+
+ if (in_nmi) {
+ rps->in_nmi_count--;
+ /*
+ * Someone did call_rcu() from nmi context. Don't do this (tm).
+ */
+ BUG_ON((rps->in_irq_count == 0) && rcu_needs_cpu(smp_processor_id()));
+ } else {
+ spin_lock(&rps->poller_lock);
+ rps->in_irq_count--;
+ if (rps->in_irq_count == 0) {
+ rps->state_normal.state = rcu_cpumask_getstate(&rcu_global_state_normal.cpus);
+ rps->state_bh.state = rcu_cpumask_getstate(&rcu_global_state_bh.cpus);
+
+ rcu_kick_poller(rps);
+ }
+ spin_unlock(&rps->poller_lock);
+ if (rcu_needs_cpu(smp_processor_id())) {
+ /*
+ * task 2: Someone did a call_rcu() in the interupt.
+ * Duh, we've lost. Force a reschedule, that leaves nohz mode.
+ *
+ * Note: This can race: our call_rcu() might have set
+ * start_immediately. But: that start might happen before
+ * we readd ourself to the global cpu mask. Then we would
+ * not take part in the global cycle - and we would not set
+ * start_immediately again, either, because our newqlen is
+ * already above qlowmark. The timeout would
+ * ensure forward progress, thus it's not that bad.
+ *
+ * FIXME: double check that this really works.
+ */
+printk(KERN_ERR" irq exit %d - need resched .\n", smp_processor_id());
+ set_need_resched();
+ }
+ }
+ }
+}
+
+#endif /* CONFIG_NO_HZ */
+
+static void rcu_init_percpu_data(struct rcu_global_state *rgs, struct rcu_cpu_state *rcs, int cpu)
+{
+ __rcu_add_cpu(rgs, rcs, cpu);
+
+ rcs->new = rcs->old = NULL;
+ rcs->newqlen = rcs->oldqlen = 0;
+}
+
+static void __cpuinit rcu_online_cpu(int cpu)
+{
+ struct rcu_percpu_data *rps;
+
+ BUG_ON(cpu_isset(cpu, rcu_nohz_mask));
+
+ rps = &per_cpu(rcu_percpu, cpu);
+
+ rcu_init_percpu_data(&rcu_global_state_normal, &rps->state_normal, cpu);
+ rcu_init_percpu_data(&rcu_global_state_bh, &rps->state_bh, cpu);
+
+ rps->cpu_mode = RCU_CPUMODE_DELAYED;
+
+ rps->data_dead.dead = NULL;
+ rps->data_dead.deadqlen = 0;
+ rps->data_dead.batchcount = RCU_BATCH_MIN;
+
+ open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+}
+
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ long cpu = (long)hcpu;
+
+printk(KERN_ERR "rcu_cpu_notify: %ld cpu %ld on cpu %d start.\n", action, cpu, smp_processor_id());
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ rcu_online_cpu(cpu);
+ break;
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ rcu_offline_cpu(cpu);
+ break;
+ default:
+ break;
+ }
+printk(KERN_ERR "rcu_cpu_notify: %ld cpu %ld on cpu %d done.\n", action, cpu, smp_processor_id());
+ return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata rcu_nb = {
+ .notifier_call = rcu_cpu_notify,
+};
+
+/*
+ * Initializes rcu mechanism. Assumed to be called early.
+ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
+ * Note that rcu_qsctr and friends are implicitly
+ * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
+ */
+void __init __rcu_init(void)
+{
+ rcu_state_init(&rcu_global_state_normal, RCU_STATE_DESTROY);
+ rcu_state_init(&rcu_global_state_bh, RCU_STATE_DESTROY);
+ rcu_cpu_notify(&rcu_nb, CPU_STARTING,
+ (void *)(long)smp_processor_id());
+ /* Register notifier for non-boot CPUs */
+ register_cpu_notifier(&rcu_nb);
+}
+
+module_param(qlowmark, int, 0);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index c506f26..cca5a83 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -284,10 +284,10 @@ void irq_exit(void)
invoke_softirq();

#ifdef CONFIG_NO_HZ
+ rcu_irq_exit(0);
/* Make sure that timer wheel updates are propagated */
if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
tick_nohz_stop_sched_tick(0);
- rcu_irq_exit();
#endif
preempt_enable_no_resched();
}
--
1.5.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: JÃrn Engel: "Re: Filesystem for block devices using flash storage?"
Previous message: Josef Bacik: "Re: [2.6 patch] provide generic_block_fiemap() only with BLOCK=y"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]