[PATCH RESEND] sched/preempt: fix cond_resched_lock() and cond_resched_softirq()

From: Konstantin Khlebnikov
Date: Thu May 14 2015 - 12:23:38 EST


These functions check should_resched() before unlocking spinlock/bh-enable:
preempt_count always non-zero => should_resched() always returns false.
cond_resched_lock() works iff spin_needbreak is set.

This patch adds argument "preempt_offset" to should_resched() add
rearranges preempt_count offset constants for that:

PREEMPT_OFFSET - offset after preempt_disable() (0 if CONFIG_PREEMPT_COUNT=n)
PREEMPT_LOCK_OFFSET - offset after spin_lock() (alias for PREEMPT_OFFSET)
SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable()
SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh()

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>

---
arch/powerpc/kvm/book3s_hv.c | 2 +-
arch/x86/include/asm/preempt.h | 4 ++--
drivers/xen/preempt.c | 2 +-
include/asm-generic/preempt.h | 4 ++--
include/linux/preempt.h | 5 +++--
include/linux/preempt_mask.h | 21 ++++++++++++++-------
include/linux/sched.h | 6 ------
kernel/sched/core.c | 6 +++---
8 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48d3c5d2ecc9..6e73b74c0c60 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2177,7 +2177,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
vc->runner = vcpu;
if (n_ceded == vc->n_runnable) {
kvmppc_vcore_blocked(vc);
- } else if (should_resched()) {
+ } else if (should_resched(PREEMPT_LOCK_OFFSET)) {
vc->vcore_state = VCORE_PREEMPT;
/* Let something else run */
cond_resched_lock(&vc->lock);
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 8f3271842533..67b6cd00a44f 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(!raw_cpu_read_4(__preempt_count));
+ return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}

#ifdef CONFIG_PREEMPT
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
index a1800c150839..46188cf41d08 100644
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(xen_in_preemptible_hcall);
asmlinkage __visible void xen_maybe_preempt_hcall(void)
{
if (unlikely(__this_cpu_read(xen_in_preemptible_hcall)
- && should_resched())) {
+ && should_resched(0))) {
/*
* Clear flag as we may be rescheduled on a different
* cpu.
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index eb6f9e6c3075..e91fb799a6da 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -71,9 +71,9 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int offset)
{
- return unlikely(!preempt_count() && tif_need_resched());
+ return unlikely(preempt_count() == offset && tif_need_resched());
}

#ifdef CONFIG_PREEMPT
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index de83b4eb1642..8cd6725c5758 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -20,7 +20,8 @@
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
extern void preempt_count_add(int val);
extern void preempt_count_sub(int val);
-#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
+#define preempt_count_dec_and_test() \
+ ({ preempt_count_sub(1); should_resched(0); })
#else
#define preempt_count_add(val) __preempt_count_add(val)
#define preempt_count_sub(val) __preempt_count_sub(val)
@@ -59,7 +60,7 @@ do { \

#define preempt_check_resched() \
do { \
- if (should_resched()) \
+ if (should_resched(0)) \
__preempt_schedule(); \
} while (0)

diff --git a/include/linux/preempt_mask.h b/include/linux/preempt_mask.h
index dbeec4d4a3be..a832c243da0b 100644
--- a/include/linux/preempt_mask.h
+++ b/include/linux/preempt_mask.h
@@ -39,7 +39,15 @@
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)

+/*
+ * The preempt_count offset after preempt_disable();
+ */
+#ifdef CONFIG_PREEMPT_COUNT
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
+#else
+#define PREEMPT_OFFSET 0
+#endif
+
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
#define NMI_OFFSET (1UL << NMI_SHIFT)
@@ -71,11 +79,10 @@
*/
#define in_nmi() (preempt_count() & NMI_MASK)

-#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_CHECK_OFFSET 1
-#else
-# define PREEMPT_CHECK_OFFSET 0
-#endif
+/*
+ * The preempt_count offset after spin_lock()
+ */
+#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET

/*
* The preempt_count offset needed for things like:
@@ -90,7 +97,7 @@
*
* Work as expected.
*/
-#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_CHECK_OFFSET)
+#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET)

/*
* Are we running in atomic context? WARNING: this macro cannot
@@ -106,7 +113,7 @@
* (used by the scheduler, *after* releasing the kernel lock)
*/
#define in_atomic_preempt_off() \
- ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
+ ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_OFFSET)

#ifdef CONFIG_PREEMPT_COUNT
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..61f4f2d5c882 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2834,12 +2834,6 @@ extern int _cond_resched(void);

extern int __cond_resched_lock(spinlock_t *lock);

-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
-#else
-#define PREEMPT_LOCK_OFFSET 0
-#endif
-
#define cond_resched_lock(lock) ({ \
___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe22f7510bce..087bf36ecd46 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4223,7 +4223,7 @@ SYSCALL_DEFINE0(sched_yield)

int __sched _cond_resched(void)
{
- if (should_resched()) {
+ if (should_resched(0)) {
preempt_schedule_common();
return 1;
}
@@ -4241,7 +4241,7 @@ EXPORT_SYMBOL(_cond_resched);
*/
int __cond_resched_lock(spinlock_t *lock)
{
- int resched = should_resched();
+ int resched = should_resched(PREEMPT_LOCK_OFFSET);
int ret = 0;

lockdep_assert_held(lock);
@@ -4263,7 +4263,7 @@ int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());

- if (should_resched()) {
+ if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
local_bh_enable();
preempt_schedule_common();
local_bh_disable();

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/