[PATCH] locking/rwsem: optimistic spin failed when handoff bit set

From: Zou Cao
Date: Tue Jul 30 2024 - 05:56:48 EST


It should not optimistic spin when handoff bit set, it will never
get the rwsem lock until first waiter be wake up. otherwise it
exacerbate the latency of osq_lock, this patch improve performance
about 5% when runing UnixBench.

it is easy to reproduce when run unixbech shell8 in xfs filesystem
with AMD EPYC 9654 96-Core Processor
./Run shell8 -C 56 -i 1

the totale machine load reach the 92% with 90% sys and 1.7% user.
all the cpu is busy in osq_lock with running perf top:

90.07% [kernel] [k] osq_lock
0.66% [kernel] [k] unmap_page_range
0.52% [kernel] [k] page_add_file_rmap
0.51% [kernel] [k] release_pages
0.30% [kernel] [k] rwsem_spin_on_owner
0.23% [kernel] [k] native_queued_spin_lock_slowpat

after this patch:

perf top:
25.59% [kernel] [k] osq_lock
4.69% [kernel] [k] unmap_page_range
3.61% [kernel] [k] native_queued_spin_lock_slowpath
3.05% [kernel] [k] release_pages
2.55% [kernel] [k] filemap_map_pages

the totale mache load reduce to 10% with 9.4% sys and 1.4 user

Signed-off-by: Zou Cao <zoucao@xxxxxxxxxxxx>
Signed-off-by: fuwenxin <fuwenxin@xxxxxxxxxxxx>
---
kernel/locking/rwsem.c | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 33cac79..7f345bb 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -684,10 +684,23 @@ enum owner_state {
};

#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+
+/*
+ * OPTIMISTIC_FAILED : optimistic spin lock failed
+ * OPTIMISTIC_SUCCESS : optimistic spin lock success
+ * OPTIMISTIC_HANDOFF : optimistic spin lock failed by HANDOFF bit set
+ */
+
+enum optimistic_stat {
+ OPTIMISTIC_FAILED,
+ OPTIMISTIC_SUCCESS,
+ OPTIMISTIC_HANDOFF,
+};
+
/*
* Try to acquire write lock before the writer has been put on wait queue.
*/
-static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+static inline int rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
long count = atomic_long_read(&sem->count);

@@ -696,10 +709,14 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
count | RWSEM_WRITER_LOCKED)) {
rwsem_set_owner(sem);
lockevent_inc(rwsem_opt_lock);
- return true;
+ return OPTIMISTIC_SUCCESS;
}
}
- return false;
+
+ if (count & RWSEM_FLAG_HANDOFF)
+ return OPTIMISTIC_HANDOFF;
+
+ return OPTIMISTIC_FAILED;
}

static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
@@ -818,7 +835,7 @@ static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)

static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
- bool taken = false;
+ enum optimistic_stat taken = OPTIMISTIC_FAILED;
int prev_owner_state = OWNER_NULL;
int loop = 0;
u64 rspin_threshold = 0;
@@ -845,7 +862,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
*/
taken = rwsem_try_write_lock_unqueued(sem);

- if (taken)
+ if (taken == OPTIMISTIC_SUCCESS || taken == OPTIMISTIC_HANDOFF)
break;

/*
@@ -930,8 +947,8 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
}
osq_unlock(&sem->osq);
done:
- lockevent_cond_inc(rwsem_opt_fail, !taken);
- return taken;
+ lockevent_cond_inc(rwsem_opt_fail, !(taken == OPTIMISTIC_SUCCESS));
+ return taken == OPTIMISTIC_SUCCESS ? true : false;
}

/*
--
1.8.3.1