Re: [GIT PULL] killable rwsems for v4.7

From: Peter Zijlstra
Date: Tue May 17 2016 - 07:02:52 EST


On Mon, May 16, 2016 at 01:32:28PM -0700, Linus Torvalds wrote:
> On Mon, May 16, 2016 at 7:55 AM, Ingo Molnar <mingo@xxxxxxxxxx> wrote:
> >
> > This tree, by Michal Hocko, implements down_write_killable(). The main usecase
> > will be to update mm_sem usage sites to use this new API,
>
> Hmm. Is somebody (Michal?) looking at down_read_killable() too?
>
> The VFS layer will want it with the pending parallel lookup code - the
> inode semaphore is being converted to a rwsem, and there's a couple of
> "killable" users.. The first step actually just wants to the exclusive
> case (ie the write case that this adds), but I think the readdir code
> could really use a reading version too..

Something roughly like so.

Completely untested etc..

---
kernel/locking/rwsem-spinlock.c | 28 +++++++++++++++++++++++-----
kernel/locking/rwsem-xadd.c | 28 +++++++++++++++++++++++++---
2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 1591f6b3539f..d66325970248 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -125,7 +125,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
/*
* get a read lock on the semaphore
*/
-void __sched __down_read(struct rw_semaphore *sem)
+int __sched __down_read_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
struct task_struct *tsk;
@@ -137,11 +137,11 @@ void __sched __down_read(struct rw_semaphore *sem)
/* granted */
sem->count++;
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
- goto out;
+ return 0;
}

tsk = current;
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_task_state(tsk, state);

/* set up my own style of waitqueue */
waiter.task = tsk;
@@ -155,6 +155,8 @@ void __sched __down_read(struct rw_semaphore *sem)

/* wait to be given the lock */
for (;;) {
+ if (signal_pending_state(state, current))
+ goto out_nolock;
if (!waiter.task)
break;
schedule();
@@ -162,8 +164,24 @@ void __sched __down_read(struct rw_semaphore *sem)
}

__set_task_state(tsk, TASK_RUNNING);
- out:
- ;
+ return 0;
+
+out_nolock:
+ raw_spin_lock_irqsave(&sem->wait_lock, flags);
+ if (waiter.task)
+ list_del(&waiter.list);
+ else
+ sem->count--;
+ if (!list_empty(&sem->wait_list))
+ __rwsem_do_wake(sem, 1);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return -EINTR;
+}
+
+void __sched __down_read(struct rw_semaphore *sem)
+{
+ __down_read_common(sem, TASK_UNINTERRUPTIBLE);
}

/*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 09e30c6225e5..4f7b1fb1f127 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -210,8 +210,8 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
/*
* Wait for the read lock to be granted
*/
-__visible
-struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
+static inline struct rw_semaphore *
+rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
@@ -244,7 +244,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)

/* wait to be given the lock */
while (true) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_task_state(tsk, state);
+ if (signal_pending_state(state, current))
+ goto out_nolock;
if (!waiter.task)
break;
schedule();
@@ -252,6 +254,26 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)

__set_task_state(tsk, TASK_RUNNING);
return sem;
+
+out_nolock:
+ raw_spin_lock_irq(&sem->wait_lock);
+ if (waiter.task != NULL)
+ list_del(&waiter.list);
+ else
+ rwsem_atomic_update(-RWSEM_ACTIVE_READ_BIAS, sem);
+ if (list_empty(&sem->wait_list))
+ rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+ else
+ __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+ raw_spin_unlock_irq(&sem->wait_lock);
+
+ return ERR_PTR(-EINTR);
+}
+
+__visible struct rw_semaphore * __sched
+rwsem_down_read_failed(struct rw_semaphore *sem)
+{
+ return rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
}
EXPORT_SYMBOL(rwsem_down_read_failed);