[PATCH 3/4] mm: synchronize saved_auxv access with arg_lock

From: Andrei Vagin

Date: Tue Feb 17 2026 - 13:10:44 EST


The mm->saved_auxv array stores the auxiliary vector, which can be
modified via prctl(PR_SET_MM_AUXV) or prctl(PR_SET_MM_MAP). Previously,
accesses to saved_auxv were not synchronized. This was a intentional
trade-off, as the vector was only used to provide information to
userspace via /proc/PID/auxv or prctl(PR_GET_AUXV), and consistency
between the auxv values left to userspace.

With the introduction of hardware capability (HWCAP) inheritance during
execve, the kernel now relies on the contents of saved_auxv to configure
the execution environment of new processes. An unsynchronized read
during execve could result in a new process inheriting an inconsistent
set of capabilities if the parent process updates its auxiliary vector
concurrently.

While it is still not strictly required to guarantee the consistency of
auxv values on the kernel side, doing so is relatively straightforward.
This change implements synchronization using arg_lock.

Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@xxxxxxxxxxxxxx>
Reviewed-by: Cyrill Gorcunov <gorcunov@xxxxxxxxx>
Reviewed-by: Michal Koutný <mkoutny@xxxxxxxx>
Signed-off-by: Andrei Vagin <avagin@xxxxxxxxxx>
---
fs/exec.c | 2 ++
fs/proc/base.c | 12 +++++++++---
include/linux/mm_types.h | 1 -
kernel/fork.c | 7 ++++++-
kernel/sys.c | 29 ++++++++++++++---------------
5 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 9c70776fca9e..8f5fba06aff8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1801,6 +1801,7 @@ static void inherit_hwcap(struct linux_binprm *bprm)
n = 1;
#endif

+ spin_lock(&mm->arg_lock);
for (i = 0; n && i < AT_VECTOR_SIZE; i += 2) {
unsigned long type = mm->saved_auxv[i];
unsigned long val = mm->saved_auxv[i + 1];
@@ -1832,6 +1833,7 @@ static void inherit_hwcap(struct linux_binprm *bprm)
n--;
}
done:
+ spin_unlock(&mm->arg_lock);
mm_flags_set(MMF_USER_HWCAP, bprm->mm);
}

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4eec684baca9..09d887741268 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1083,14 +1083,20 @@ static ssize_t auxv_read(struct file *file, char __user *buf,
{
struct mm_struct *mm = file->private_data;
unsigned int nwords = 0;
+ unsigned long saved_auxv[AT_VECTOR_SIZE];

if (!mm)
return 0;
+
+ spin_lock(&mm->arg_lock);
+ memcpy(saved_auxv, mm->saved_auxv, sizeof(saved_auxv));
+ spin_unlock(&mm->arg_lock);
+
do {
nwords += 2;
- } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
- return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
- nwords * sizeof(mm->saved_auxv[0]));
+ } while (saved_auxv[nwords - 2] != 0); /* AT_NULL */
+ return simple_read_from_buffer(buf, count, ppos, saved_auxv,
+ nwords * sizeof(saved_auxv[0]));
}

static const struct file_operations proc_auxv_operations = {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2f3c6ad48c0a..d1a95b90e448 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1254,7 +1254,6 @@ struct mm_struct {
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
-
unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

#ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c92a2bc3cbb..e17e57e29b6a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1105,8 +1105,13 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
__mm_flags_overwrite_word(mm, mmf_init_legacy_flags(flags));
mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK;

- if (mm_flags_test(MMF_USER_HWCAP, current->mm))
+ if (mm_flags_test(MMF_USER_HWCAP, current->mm)) {
+ spin_lock(&current->mm->arg_lock);
mm_flags_set(MMF_USER_HWCAP, mm);
+ memcpy(mm->saved_auxv, current->mm->saved_auxv,
+ sizeof(mm->saved_auxv));
+ spin_unlock(&current->mm->arg_lock);
+ }
} else {
__mm_flags_overwrite_word(mm, default_dump_filter);
mm->def_flags = 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index e4b0fa2f6845..c679b5797e73 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2147,20 +2147,11 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
mm->arg_end = prctl_map.arg_end;
mm->env_start = prctl_map.env_start;
mm->env_end = prctl_map.env_end;
- spin_unlock(&mm->arg_lock);
-
- /*
- * Note this update of @saved_auxv is lockless thus
- * if someone reads this member in procfs while we're
- * updating -- it may get partly updated results. It's
- * known and acceptable trade off: we leave it as is to
- * not introduce additional locks here making the kernel
- * more complex.
- */
if (prctl_map.auxv_size) {
- memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
mm_flags_set(MMF_USER_HWCAP, mm);
+ memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv));
}
+ spin_unlock(&mm->arg_lock);

mmap_read_unlock(mm);
return 0;
@@ -2190,10 +2181,10 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,

BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));

- task_lock(current);
- memcpy(mm->saved_auxv, user_auxv, len);
+ spin_lock(&mm->arg_lock);
mm_flags_set(MMF_USER_HWCAP, mm);
- task_unlock(current);
+ memcpy(mm->saved_auxv, user_auxv, len);
+ spin_unlock(&mm->arg_lock);

return 0;
}
@@ -2481,9 +2472,17 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3,
static int prctl_get_auxv(void __user *addr, unsigned long len)
{
struct mm_struct *mm = current->mm;
+ unsigned long auxv[AT_VECTOR_SIZE];
unsigned long size = min_t(unsigned long, sizeof(mm->saved_auxv), len);

- if (size && copy_to_user(addr, mm->saved_auxv, size))
+ if (!size)
+ return sizeof(mm->saved_auxv);
+
+ spin_lock(&mm->arg_lock);
+ memcpy(auxv, mm->saved_auxv, size);
+ spin_unlock(&mm->arg_lock);
+
+ if (copy_to_user(addr, auxv, size))
return -EFAULT;
return sizeof(mm->saved_auxv);
}
--
2.53.0.310.g728cabbaf7-goog