Re: [RFC 1/1] rwsem: Shrink rwsem by one pointer

From: Matthew Wilcox

Date: Wed Feb 18 2026 - 16:02:46 EST


On Tue, Feb 17, 2026 at 12:27:29PM -0800, Linus Torvalds wrote:
> I like this, but I have to say that I dislike how rwsem_add_waiter()
> in particular ends up looking.
>
> Not because it's horrible on its own, but when you look at the
> call-sites, that function ends up being entirely pointless.

I confess, I didn't look at the callers. Good catch; I've integrated
your suggestion and it looks better.

I was most concerned with just how ugly __rwsem_del_waiter() looked.
I had a good think about it and have an improved version.

> Somewhat similarly, I also reacted to this part:
>
> -#define rwsem_first_waiter(sem) \
> - list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
> +#define rwsem_first_waiter(sem) sem->first_waiter
>
> that rwsem_first_waiter() macro used to make sense as a syntactic
> helper function. But now it really doesn't. It is literally more
> typing and *less* legible than just accessing that new
> "sem->first_waiter" field.

Yep, I did notice that too, just decided not to fix it. Also taken
care of in the next version.

Here's all the changes I made, and I'll post a rolled-up version
next.

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 4226eb0ec5da..16f3db35652a 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -341,7 +341,6 @@ struct rwsem_waiter {
unsigned long timeout;
bool handoff_set;
};
-#define rwsem_first_waiter(sem) sem->first_waiter

enum rwsem_wake_type {
RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
@@ -364,36 +363,19 @@ enum rwsem_wake_type {
*/
#define MAX_READERS_WAKEUP 0x100

-static inline void
-rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
-{
- struct rwsem_waiter *first = sem->first_waiter;
- lockdep_assert_held(&sem->wait_lock);
- if (first) {
- list_add_tail(&waiter->list, &first->list);
- } else {
- INIT_LIST_HEAD(&waiter->list);
- sem->first_waiter = waiter;
- }
- /* caller will set RWSEM_FLAG_WAITERS */
-}
-
static inline
bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
{
- if (sem->first_waiter == waiter) {
- if (list_empty(&waiter->list)) {
- sem->first_waiter = NULL;
- return true;
- } else {
- sem->first_waiter = list_first_entry(&waiter->list,
- struct rwsem_waiter, list);
- list_del(&waiter->list);
- }
- } else {
- list_del(&waiter->list);
+ if (list_empty(&waiter->list)) {
+ sem->first_waiter = NULL;
+ return true;
}

+ if (sem->first_waiter == waiter)
+ sem->first_waiter = list_first_entry(&waiter->list,
+ struct rwsem_waiter, list);
+ list_del(&waiter->list);
+
return false;
}

@@ -453,7 +435,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem,
* Take a peek at the queue head waiter such that we can determine
* the wakeup(s) to perform.
*/
- waiter = rwsem_first_waiter(sem);
+ waiter = sem->first_waiter;

if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
if (wake_type == RWSEM_WAKE_ANY) {
@@ -612,8 +594,6 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
struct wake_q_head *wake_q)
__releases(&sem->wait_lock)
{
- bool first = rwsem_first_waiter(sem) == waiter;
-
wake_q_init(wake_q);

/*
@@ -621,7 +601,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
* the first waiter, we wake up the remaining waiters as they may
* be eligible to acquire or spin on the lock.
*/
- if (rwsem_del_waiter(sem, waiter) && first)
+ if (rwsem_del_waiter(sem, waiter) && sem->first_waiter == waiter)
rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
if (!wake_q_empty(wake_q))
@@ -638,7 +618,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
{
- struct rwsem_waiter *first = rwsem_first_waiter(sem);
+ struct rwsem_waiter *first = sem->first_waiter;
long count, new;

lockdep_assert_held(&sem->wait_lock);
@@ -1030,7 +1010,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
{
long adjustment = -RWSEM_READER_BIAS;
long rcnt = (count >> RWSEM_READER_SHIFT);
- struct rwsem_waiter waiter;
+ struct rwsem_waiter waiter, *first;
DEFINE_WAKE_Q(wake_q);

/*
@@ -1071,7 +1051,8 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
waiter.handoff_set = false;

raw_spin_lock_irq(&sem->wait_lock);
- if (!sem->first_waiter) {
+ first = sem->first_waiter;
+ if (!first) {
/*
* In case the wait queue is empty and the lock isn't owned
* by a writer, this reader can exit the slowpath and return
@@ -1087,8 +1068,11 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
return sem;
}
adjustment += RWSEM_FLAG_WAITERS;
+ INIT_LIST_HEAD(&waiter.list);
+ sem->first_waiter = &waiter;
+ } else {
+ list_add_tail(&waiter.list, &first->list);
}
- rwsem_add_waiter(sem, &waiter);

/* we're now waiting on the lock, but no longer actively locking */
count = atomic_long_add_return(adjustment, &sem->count);
@@ -1146,7 +1130,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat
static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
{
- struct rwsem_waiter waiter;
+ struct rwsem_waiter waiter, *first;
DEFINE_WAKE_Q(wake_q);

/* do optimistic spinning and steal lock if possible */
@@ -1165,10 +1149,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
waiter.handoff_set = false;

raw_spin_lock_irq(&sem->wait_lock);
- rwsem_add_waiter(sem, &waiter);

- /* we're now waiting on the lock */
- if (rwsem_first_waiter(sem) != &waiter) {
+ first = sem->first_waiter;
+ if (first) {
+ list_add_tail(&waiter.list, &first->list);
rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
&wake_q);
if (!wake_q_empty(&wake_q)) {
@@ -1181,6 +1165,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
raw_spin_lock_irq(&sem->wait_lock);
}
} else {
+ INIT_LIST_HEAD(&waiter.list);
+ sem->first_waiter = &waiter;
atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
}