Re: [RFC][PATCH 00/13] percpu rwsem -v2

From: Peter Zijlstra
Date: Tue Jun 23 2015 - 13:50:31 EST


On Tue, Jun 23, 2015 at 04:56:39PM +0200, Daniel Wagner wrote:
> flock02
> mean variance sigma max min
> tip-1 11.8994 0.5874 0.7664 13.2022 8.6324
> tip-2 11.7394 0.5252 0.7247 13.2540 9.7513
> tip-3 11.8155 0.5288 0.7272 13.2700 9.9480
> tip+percpu-rswem-1 15.3601 0.8981 0.9477 16.8116 12.6910
> tip+percpu-rswem-2 15.2558 0.8442 0.9188 17.0199 12.9586
> tip+percpu-rswem-3 15.5297 0.6386 0.7991 17.4392 12.7992

I did indeed manage to get flock02 down to a usable level and found:

3.20 : ffffffff811ecbdf: incl %gs:0x7ee1de72(%rip) # aa58 <__preempt_count>
0.27 : ffffffff811ecbe6: mov 0xa98553(%rip),%rax # ffffffff81c85140 <file_rwsem>
10.78 : ffffffff811ecbed: incl %gs:(%rax)
0.19 : ffffffff811ecbf0: mov 0xa9855a(%rip),%edx # ffffffff81c85150 <file_rwsem+0x10>
0.00 : ffffffff811ecbf6: test %edx,%edx
0.00 : ffffffff811ecbf8: jne ffffffff811ecdd1 <flock_lock_file+0x261>
3.47 : ffffffff811ecbfe: decl %gs:0x7ee1de53(%rip) # aa58 <__preempt_count>
0.00 : ffffffff811ecc05: je ffffffff811eccec <flock_lock_file+0x17c>

Which is percpu_down_read(). Now aside from the fact that I run a
PREEMPT=y kernel, it looks like that sem->refcount increment stalls
because of the dependent load.

Manually hoisting the load very slightly improves things:

0.24 : ffffffff811ecbdf: mov 0xa9855a(%rip),%rax # ffffffff81c85140 <file_rwsem>
5.88 : ffffffff811ecbe6: incl %gs:0x7ee1de6b(%rip) # aa58 <__preempt_count>
7.94 : ffffffff811ecbed: incl %gs:(%rax)
0.30 : ffffffff811ecbf0: mov 0xa9855a(%rip),%edx # ffffffff81c85150 <file_rwsem+0x10>
0.00 : ffffffff811ecbf6: test %edx,%edx
0.00 : ffffffff811ecbf8: jne ffffffff811ecdd1 <flock_lock_file+0x261>
3.70 : ffffffff811ecbfe: decl %gs:0x7ee1de53(%rip) # aa58 <__preempt_count>
0.00 : ffffffff811ecc05: je ffffffff811eccec <flock_lock_file+0x17c>

But its not much :/

Using DEFINE_STATIC_PERCPU_RWSEM(file_rwsem) would allow GCC to omit the
sem->refcount load entirely, but its not smart enough to see that it can
(tested 4.9 and 5.1).

---
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -35,6 +35,8 @@ extern void __percpu_up_read(struct perc

static inline void _percpu_down_read(struct percpu_rw_semaphore *sem)
{
+ unsigned int __percpu *refcount = sem->refcount;
+
might_sleep();

preempt_disable();
@@ -47,7 +49,7 @@ static inline void _percpu_down_read(str
* writer will see anything we did within this RCU-sched read-side
* critical section.
*/
- __this_cpu_inc(*sem->refcount);
+ __this_cpu_inc(*refcount);
if (unlikely(!rcu_sync_is_idle(&sem->rss)))
__percpu_down_read(sem); /* Unconditional memory barrier. */
preempt_enable();
@@ -81,6 +83,8 @@ static inline bool percpu_down_read_tryl

static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
{
+ unsigned int __percpu *refcount = sem->refcount;
+
/*
* The barrier() in preempt_disable() prevents the compiler from
* bleeding the critical section out.
@@ -90,7 +94,7 @@ static inline void percpu_up_read(struct
* Same as in percpu_down_read().
*/
if (likely(rcu_sync_is_idle(&sem->rss)))
- __this_cpu_dec(*sem->refcount);
+ __this_cpu_dec(*refcount);
else
__percpu_up_read(sem); /* Unconditional memory barrier. */
preempt_enable();
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/