[PATCH 2/2] locking/percpu-rwsem: Introduce bias knob

From: Peter Zijlstra
Date: Thu Jul 14 2016 - 14:31:37 EST


The current percpu-rwsem read side is entirely free of serializing
instructions at the cost of having a synchronize_sched() in the write
path.

The latency of the synchronize_sched() is too high for some users
(cgroups), so provide a __percpu_init_rwsem(.bias) argument to forgot
this synchronize_sched() at the cost of forcing all readers into the
slow path, which has serializing instructions.

Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
Cc: Paul McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
Reported-by: John Stultz <john.stultz@xxxxxxxxxx>
Reported-by: Dmitry Shmidt <dimitrysh@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
fs/super.c | 3 ++-
include/linux/percpu-rwsem.h | 15 +++++++++++++--
kernel/cgroup.c | 2 +-
kernel/locking/percpu-rwsem.c | 10 +++++++++-
4 files changed, 25 insertions(+), 5 deletions(-)

--- a/fs/super.c
+++ b/fs/super.c
@@ -195,7 +195,8 @@ static struct super_block *alloc_super(s
for (i = 0; i < SB_FREEZE_LEVELS; i++) {
if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
sb_writers_name[i],
- &type->s_writers_key[i]))
+ &type->s_writers_key[i],
+ PERCPU_RWSEM_READER))
goto fail;
}
init_waitqueue_head(&s->s_writers.wait_unfrozen);
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -90,15 +90,26 @@ static inline void percpu_up_read(struct
extern void percpu_down_write(struct percpu_rw_semaphore *);
extern void percpu_up_write(struct percpu_rw_semaphore *);

+enum percpu_rwsem_bias { PERCPU_RWSEM_READER, PERCPU_RWSEM_WRITER };
+
extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
- const char *, struct lock_class_key *);
+ const char *, struct lock_class_key *,
+ enum percpu_rwsem_bias bias);

extern void percpu_free_rwsem(struct percpu_rw_semaphore *);

#define percpu_init_rwsem(sem) \
({ \
static struct lock_class_key rwsem_key; \
- __percpu_init_rwsem(sem, #sem, &rwsem_key); \
+ __percpu_init_rwsem(sem, #sem, &rwsem_key, \
+ PERCPU_RWSEM_READER); \
+})
+
+#define percpu_init_rwsem_writer(sem) \
+({ \
+ static struct lock_class_key rwsem_key; \
+ __percpu_init_rwsem(sem, #sem, &rwsem_key, \
+ PERCPU_RWSEM_WRITER); \
})

#define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5605,7 +5605,7 @@ int __init cgroup_init(void)
int ssid;

BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
- BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
+ BUG_ON(percpu_init_rwsem_writer(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));

--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -11,7 +11,8 @@
enum { readers_slow, readers_block };

int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
- const char *name, struct lock_class_key *rwsem_key)
+ const char *name, struct lock_class_key *rwsem_key,
+ enum percpu_rwsem_bias bias)
{
sem->read_count = alloc_percpu(int);
if (unlikely(!sem->read_count))
@@ -19,6 +20,13 @@ int __percpu_init_rwsem(struct percpu_rw

/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+ if (bias == PERCPU_RWSEM_WRITER) {
+ /*
+ * Disable rcu_sync() and force slow path.
+ */
+ sem->rss.gp_count++;
+ sem->rss.gp_state = !0;
+ }
__init_rwsem(&sem->rw_sem, name, rwsem_key);
init_waitqueue_head(&sem->writer);
sem->state = readers_slow;