[PATCH v10 14/21] futex: Resize local futex hash table based on number of threads.

From: Sebastian Andrzej Siewior
Date: Wed Mar 12 2025 - 11:18:31 EST


Automatically size the local hash based on the number of threads but
don't exceed the number of online CPUs. The logic tries to allocate
between 16 and futex_hashsize (the default for the system wide hash
bucket) and uses 4 * number-of-threads.

On CONFIG_BASE_SMALL configs, the additional members for private hash
resize have been removed in order to save memory on mm_struct and avoid
any additional memory consumption. To achive this, a
CONFIG_FUTEX_PRIVATE_HASH has been introduced which depends on
!BASE_SMALL and can be extended later.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
include/linux/futex.h | 20 ++++++--------
include/linux/mm_types.h | 2 +-
init/Kconfig | 5 ++++
kernel/fork.c | 4 +--
kernel/futex/core.c | 57 ++++++++++++++++++++++++++++++++++++----
kernel/futex/futex.h | 8 ++++++
6 files changed, 75 insertions(+), 21 deletions(-)

diff --git a/include/linux/futex.h b/include/linux/futex.h
index bfb38764bac7a..7e14d2e9162d2 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -78,6 +78,8 @@ void futex_exec_release(struct task_struct *tsk);
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
int futex_hash_prctl(unsigned long arg2, unsigned long arg3);
+
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
int futex_hash_allocate_default(void);
void futex_hash_free(struct mm_struct *mm);

@@ -87,14 +89,13 @@ static inline void futex_mm_init(struct mm_struct *mm)
mutex_init(&mm->futex_hash_lock);
}

-static inline bool futex_hash_requires_allocation(void)
-{
- if (current->mm->futex_phash)
- return false;
- return true;
-}
+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline int futex_hash_allocate_default(void) { return 0; }
+static inline void futex_hash_free(struct mm_struct *mm) { }
+static inline void futex_mm_init(struct mm_struct *mm) { }
+#endif /* CONFIG_FUTEX_PRIVATE_HASH */

-#else
+#else /* !CONFIG_FUTEX */
static inline void futex_init_task(struct task_struct *tsk) { }
static inline void futex_exit_recursive(struct task_struct *tsk) { }
static inline void futex_exit_release(struct task_struct *tsk) { }
@@ -116,11 +117,6 @@ static inline int futex_hash_allocate_default(void)
static inline void futex_hash_free(struct mm_struct *mm) { }
static inline void futex_mm_init(struct mm_struct *mm) { }

-static inline bool futex_hash_requires_allocation(void)
-{
- return false;
-}
-
#endif

#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46abaf1ce1c0a..e0e8adbe66bdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -938,7 +938,7 @@ struct mm_struct {
*/
seqcount_t mm_lock_seq;
#endif
-#ifdef CONFIG_FUTEX
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
struct mutex futex_hash_lock;
struct futex_private_hash __rcu *futex_phash;
struct futex_private_hash *futex_phash_new;
diff --git a/init/Kconfig b/init/Kconfig
index a0ea04c177842..bb209c12a2bda 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1683,6 +1683,11 @@ config FUTEX_PI
depends on FUTEX && RT_MUTEXES
default y

+config FUTEX_PRIVATE_HASH
+ bool
+ depends on FUTEX && !BASE_SMALL
+ default y
+
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
diff --git a/kernel/fork.c b/kernel/fork.c
index 440c5808f70a2..69f98d7e85054 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2141,9 +2141,7 @@ static bool need_futex_hash_allocate_default(u64 clone_flags)
{
if ((clone_flags & (CLONE_THREAD | CLONE_VM)) != (CLONE_THREAD | CLONE_VM))
return false;
- if (!thread_group_empty(current))
- return false;
- return futex_hash_requires_allocation();
+ return true;
}

/*
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index c5a9db946b421..229009279ee7d 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -138,6 +138,7 @@ static struct futex_hash_bucket *futex_hash_private(union futex_key *key,
return &fhb[hash & hash_mask];
}

+#ifdef CONFIG_FUTEX_PRIVATE_HASH
static void futex_rehash_current_users(struct futex_private_hash *old,
struct futex_private_hash *new)
{
@@ -256,6 +257,14 @@ static struct futex_private_hash *futex_get_private_hb(union futex_key *key)
return futex_get_private_hash();
}

+#else
+
+static struct futex_private_hash *futex_get_private_hb(union futex_key *key)
+{
+ return NULL;
+}
+#endif
+
/**
* futex_hash - Return the hash bucket in the global hash
* @key: Pointer to the futex key for which the hash is calculated
@@ -279,6 +288,7 @@ struct futex_hash_bucket *__futex_hash(union futex_key *key)
return &futex_queues[hash & futex_hashmask];
}

+#ifdef CONFIG_FUTEX_PRIVATE_HASH
bool futex_put_private_hash(struct futex_private_hash *hb_p)
{
bool released;
@@ -315,6 +325,7 @@ void futex_hash_put(struct futex_hash_bucket *hb)
return;
futex_put_private_hash(hb_p);
}
+#endif

/**
* futex_setup_timer - set up the sleeping hrtimer.
@@ -1351,12 +1362,15 @@ void futex_exit_release(struct task_struct *tsk)
static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
struct futex_private_hash *hb_p)
{
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
fhb->hb_p = hb_p;
+#endif
atomic_set(&fhb->waiters, 0);
plist_head_init(&fhb->chain);
spin_lock_init(&fhb->lock);
}

+#ifdef CONFIG_FUTEX_PRIVATE_HASH
void futex_hash_free(struct mm_struct *mm)
{
struct futex_private_hash *hb_p;
@@ -1389,10 +1403,7 @@ static int futex_hash_allocate(unsigned int hash_slots)

if (hash_slots == 0)
hash_slots = 16;
- if (hash_slots < 2)
- hash_slots = 2;
- if (hash_slots > 131072)
- hash_slots = 131072;
+ hash_slots = clamp(hash_slots, 2, futex_hashmask + 1);
if (!is_power_of_2(hash_slots))
hash_slots = rounddown_pow_of_two(hash_slots);

@@ -1434,7 +1445,30 @@ static int futex_hash_allocate(unsigned int hash_slots)

int futex_hash_allocate_default(void)
{
- return futex_hash_allocate(0);
+ unsigned int threads, buckets, current_buckets = 0;
+ struct futex_private_hash *hb_p;
+
+ if (!current->mm)
+ return 0;
+
+ scoped_guard(rcu) {
+ threads = min_t(unsigned int, get_nr_threads(current), num_online_cpus());
+ hb_p = rcu_dereference(current->mm->futex_phash);
+ if (hb_p)
+ current_buckets = hb_p->hash_mask + 1;
+ }
+
+ /*
+ * The default allocation will remain within
+ * 16 <= threads * 4 <= global hash size
+ */
+ buckets = roundup_pow_of_two(4 * threads);
+ buckets = clamp(buckets, 16, futex_hashmask + 1);
+
+ if (current_buckets >= buckets)
+ return 0;
+
+ return futex_hash_allocate(buckets);
}

static int futex_hash_get_slots(void)
@@ -1448,6 +1482,19 @@ static int futex_hash_get_slots(void)
return 0;
}

+#else
+
+static int futex_hash_allocate(unsigned int hash_slots)
+{
+ return -EINVAL;
+}
+
+static int futex_hash_get_slots(void)
+{
+ return 0;
+}
+#endif
+
int futex_hash_prctl(unsigned long arg2, unsigned long arg3)
{
int ret;
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 973efcca2e01b..782021feffe2e 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -205,11 +205,19 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
int flags, u64 range_ns);

extern struct futex_hash_bucket *__futex_hash(union futex_key *key);
+#ifdef CONFIG_FUTEX_PRIVATE_HASH
extern void futex_hash_get(struct futex_hash_bucket *hb);
extern void futex_hash_put(struct futex_hash_bucket *hb);
extern struct futex_private_hash *futex_get_private_hash(void);
extern bool futex_put_private_hash(struct futex_private_hash *hb_p);

+#else /* !CONFIG_FUTEX_PRIVATE_HASH */
+static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
+static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
+static inline struct futex_private_hash *futex_get_private_hash(void) { return NULL; }
+static inline bool futex_put_private_hash(struct futex_private_hash *hb_p) { return false; }
+#endif
+
DEFINE_CLASS(hb, struct futex_hash_bucket *,
if (_T) futex_hash_put(_T),
__futex_hash(key), union futex_key *key);
--
2.47.2