[PATCH v8 19/19] locking/rwsem: Disable preemption in down_read*() if owner in count

From: Waiman Long
Date: Mon May 20 2019 - 17:02:52 EST


It is very unlikely that successive preemption at the middle of
down_read's inc-check-dec sequence will cause the reader count to
overflow, For absolute correctness, however, we still need to prevent
that possibility from happening. So preemption will be disabled during
the down_read*() call.

For PREEMPT=n kernels, there isn't much overhead in doing that.
For PREEMPT=y kernels, there will be some additional cost. RT kernels
have their own rwsem code, so it will not be a problem for them.

If MERGE_OWNER_INTO_COUNT isn't defined, we don't need to worry about
reader count overflow and so we don't need to disable preemption.

Signed-off-by: Waiman Long <longman@xxxxxxxxxx>
---
kernel/locking/rwsem.c | 38 ++++++++++++++++++++++++++++++++++----
1 file changed, 34 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 29f0e0e5b62e..cede2f99220b 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -356,6 +356,24 @@ static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
}

#ifdef MERGE_OWNER_INTO_COUNT
+/*
+ * It is very unlikely that successive preemption at the middle of
+ * down_read's inc-check-dec sequence will cause the reader count to
+ * overflow, For absolute correctness, we still need to prevent
+ * that possibility from happening. So preemption will be disabled
+ * during the down_read*() call.
+ *
+ * For PREEMPT=n kernels, there isn't much overhead in doing that.
+ * For PREEMPT=y kernels, there will be some additional cost.
+ *
+ * If MERGE_OWNER_INTO_COUNT isn't defined, we don't need to worry
+ * about reader count overflow and so we don't need to disable
+ * preemption.
+ */
+#define rwsem_preempt_disable() preempt_disable()
+#define rwsem_preempt_enable() preempt_enable()
+#define rwsem_schedule_preempt_disabled() schedule_preempt_disabled()
+
/*
* Get the owner value from count to have early access to the task structure.
*/
@@ -420,6 +438,10 @@ late_initcall(rwsem_show_count_status);

#else /* !MERGE_OWNER_INTO_COUNT */

+#define rwsem_preempt_disable()
+#define rwsem_preempt_enable()
+#define rwsem_schedule_preempt_disabled() schedule()
+
/*
* Return just the real task structure pointer of the owner
*/
@@ -1247,7 +1269,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, int state, long adjustment)
raw_spin_unlock_irq(&sem->wait_lock);
break;
}
- schedule();
+ rwsem_schedule_preempt_disabled();
lockevent_inc(rwsem_sleep_reader);
}

@@ -1472,28 +1494,36 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
*/
inline void __down_read(struct rw_semaphore *sem)
{
- long tmp, adjustment = rwsem_read_trylock(sem, &tmp);
+ long tmp, adjustment;

+ rwsem_preempt_disable();
+ adjustment = rwsem_read_trylock(sem, &tmp);
if (unlikely(tmp & RWSEM_READ_FAILED_MASK)) {
rwsem_down_read_slowpath(sem, TASK_UNINTERRUPTIBLE, adjustment);
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
} else {
rwsem_set_reader_owned(sem);
}
+ rwsem_preempt_enable();
}

static inline int __down_read_killable(struct rw_semaphore *sem)
{
- long tmp, adjustment = rwsem_read_trylock(sem, &tmp);
+ long tmp, adjustment;

+ rwsem_preempt_disable();
+ adjustment = rwsem_read_trylock(sem, &tmp);
if (unlikely(tmp & RWSEM_READ_FAILED_MASK)) {
if (IS_ERR(rwsem_down_read_slowpath(sem, TASK_KILLABLE,
- adjustment)))
+ adjustment))) {
+ rwsem_preempt_enable();
return -EINTR;
+ }
DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
} else {
rwsem_set_reader_owned(sem);
}
+ rwsem_preempt_enable();
return 0;
}

--
2.18.1