Re: [PATCH -v2 5/7] locking/percpu-rwsem: Remove the embedded rwsem

From: Kirill Tkhai
Date: Mon Feb 03 2020 - 09:33:29 EST


On 03.02.2020 16:44, Peter Zijlstra wrote:
> Hi Kirill,
>
> On Mon, Feb 03, 2020 at 02:45:16PM +0300, Kirill Tkhai wrote:
>
>> Maybe, this is not a subject of this patchset. But since this is a newborn function,
>> can we introduce it to save one unneeded wake_up of writer? This is a situation,
>> when writer becomes woken up just to write itself into sem->writer.task.
>>
>> Something like below:
>>
>> diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
>> index a136677543b4..e4f88bfd43ed 100644
>> --- a/kernel/locking/percpu-rwsem.c
>> +++ b/kernel/locking/percpu-rwsem.c
>> @@ -9,6 +9,8 @@
>> #include <linux/sched/task.h>
>> #include <linux/errno.h>
>>
>> +static bool readers_active_check(struct percpu_rw_semaphore *sem);
>> +
>> int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
>> const char *name, struct lock_class_key *key)
>> {
>> @@ -101,6 +103,16 @@ static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
>> return __percpu_down_write_trylock(sem);
>> }
>>
>> +static void queue_sem_writer(struct percpu_rw_semaphore *sem, struct task_struct *p)
>> +{
>> + rcu_assign_pointer(sem->writer.task, p);
>> + smp_mb();
>> + if (readers_active_check(sem)) {
>> + WRITE_ONCE(sem->writer.task, NULL);
>> + wake_up_process(p);
>> + }
>> +}
>> +
>> /*
>> * The return value of wait_queue_entry::func means:
>> *
>> @@ -129,7 +141,11 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
>> list_del_init(&wq_entry->entry);
>> smp_store_release(&wq_entry->private, NULL);
>>
>> - wake_up_process(p);
>> + if (reader || readers_active_check(sem))
>> + wake_up_process(p);
>> + else
>> + queue_sem_writer(sem, p);
>> +
>> put_task_struct(p);
>>
>> return !reader; /* wake (readers until) 1 writer */
>> @@ -247,8 +263,11 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
>> * them.
>> */
>>
>> - /* Wait for all active readers to complete. */
>> - rcuwait_wait_event(&sem->writer, readers_active_check(sem));
>> + if (rcu_access_pointer(sem->writer.task))
>> + WRITE_ONCE(sem->writer.task, NULL);
>> + else
>> + /* Wait for all active readers to complete. */
>> + rcuwait_wait_event(&sem->writer, readers_active_check(sem));
>> }
>> EXPORT_SYMBOL_GPL(percpu_down_write);
>>
>> Just an idea, completely untested.
>
> Hurm,.. I think I see what you're proposing. I also think your immediate
> patch is racy, consider for example what happens if your
> queue_sem_writer() finds !readers_active_check(), such that we do in
> fact need to wait. Then your percpu_down_write() will find
> sem->writer.task and clear it -- no waiting.

You mean, down_read() wakes up waiters unconditionally. So, optimization
in percpu_down_write() will miss readers_active_check() check.

You are sure. Then we have to modify this a little bit and to remove
the optimization from percpu_down_write():

diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index a136677543b4..90647ab28804 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -9,6 +9,8 @@
#include <linux/sched/task.h>
#include <linux/errno.h>

+static bool readers_active_check(struct percpu_rw_semaphore *sem);
+
int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
const char *name, struct lock_class_key *key)
{
@@ -101,6 +103,16 @@ static bool __percpu_rwsem_trylock(struct percpu_rw_semaphore *sem, bool reader)
return __percpu_down_write_trylock(sem);
}

+static void queue_sem_writer(struct percpu_rw_semaphore *sem, struct task_struct *p)
+{
+ rcu_assign_pointer(sem->writer.task, p);
+ smp_mb();
+ if (readers_active_check(sem)) {
+ WRITE_ONCE(sem->writer.task, NULL);
+ wake_up_process(p);
+ }
+}
+
/*
* The return value of wait_queue_entry::func means:
*
@@ -129,7 +141,11 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
list_del_init(&wq_entry->entry);
smp_store_release(&wq_entry->private, NULL);

- wake_up_process(p);
+ if (reader || readers_active_check(sem))
+ wake_up_process(p);
+ else
+ queue_sem_writer(sem, p);
+
put_task_struct(p);

return !reader; /* wake (readers until) 1 writer */
@@ -248,6 +264,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
*/

/* Wait for all active readers to complete. */
+ /* sem->writer is NULL or points to current */
rcuwait_wait_event(&sem->writer, readers_active_check(sem));
}
EXPORT_SYMBOL_GPL(percpu_down_write);

> Also, I'm not going to hold up these patches for this, we can always do
> this on top.
>
> Still, let me consider this a little more.

No problem, this is just an idea.