[PATCH] ideas

From: Lorenzo Stoakes

Date: Tue Jun 09 2026 - 05:39:39 EST


---
fs/proc/task_mmu.c | 89 ++++++++++++++++++++++------------------------
1 file changed, 42 insertions(+), 47 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c2bd9f5bbbcd..16bf3cd8c7c7 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -233,16 +233,6 @@ static inline void reacquire_rcu(struct proc_maps_private *priv)
vma_iter_set(&priv->iter, priv->lock_ctx.locked_vma->vm_end);
}

-static inline bool is_mmap_lock_contended(struct proc_maps_private *priv)
-{
- struct proc_maps_locking_ctx *lock_ctx = &priv->lock_ctx;
-
- if (!lock_ctx->mmap_locked)
- return false;
-
- return !!mmap_lock_is_contended(lock_ctx->mm);
-}
-
#else /* CONFIG_PER_VMA_LOCK */

static inline int lock_ctx_mm(struct proc_maps_locking_ctx *lock_ctx)
@@ -278,11 +268,6 @@ static inline bool fallback_to_mmap_lock(struct proc_maps_private *priv,
return false;
}

-static inline bool is_mmap_lock_contended(struct proc_maps_private *priv)
-{
- return !!mmap_lock_is_contended(priv->lock_ctx.mm);
-}
-
static inline void drop_rcu(struct proc_maps_private *priv) {}
static inline void reacquire_rcu(struct proc_maps_private *priv) {}

@@ -1375,17 +1360,24 @@ get_smaps_shmem_walk_ops(struct proc_maps_private *priv)

#endif /* CONFIG_PER_VMA_LOCK */

-/*
- * Gather mem stats from @vma with the indicated beginning
- * address @start, and keep them in @mss.
+/**
+ * smap_gather_stats() - Gather mem stats from @vma.
+ * @priv: proc maps private state.
+ * @vma: The VMA whoms stats we wish to gather.
+ * @mss: The accumulated stats.
+ * @start: The address from which to start.
*
- * Use vm_start of @vma as the beginning address if @start is 0.
+ * This gathers stats for the whole of the VMA unless the mmap lock was dropped
+ * and we raced a VMA merge, in which case we only gather stats for the
+ * remainder of the merged range.
*/
static void smap_gather_stats(struct proc_maps_private *priv,
struct vm_area_struct *vma,
- struct mem_size_stats *mss, unsigned long start)
+ struct mem_size_stats *mss,
+ unsigned long start)
{
const struct mm_walk_ops *ops = get_smaps_walk_ops(priv);
+ const bool is_partial = start > vma->vm_start;

/* Invalid start */
if (start >= vma->vm_end)
@@ -1408,20 +1400,20 @@ static void smap_gather_stats(struct proc_maps_private *priv,
* Unless we know that the shmem object (or the part mapped by
* our VMA) has no swapped out pages at all.
*/
- unsigned long shmem_swapped = shmem_swap_usage(vma);
+ const unsigned long shmem_swapped = shmem_swap_usage(vma);
+ const bool shared_or_ro = vma_test(vma, VMA_SHARED_BIT) ||
+ !vma_test(vma, VMA_WRITE_BIT);

- if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
- !(vma->vm_flags & VM_WRITE))) {
+ if (!is_partial && (!shmem_swapped || shared_or_ro))
mss->swap += shmem_swapped;
- } else {
+ else
ops = get_smaps_shmem_walk_ops(priv);
- }
}

- if (!start)
- walk_page_vma(vma, ops, mss);
- else
+ if (is_partial)
walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss);
+ else
+ walk_page_vma(vma, ops, mss);

reacquire_rcu(priv);
}
@@ -1476,7 +1468,7 @@ static int show_smap(struct seq_file *m, void *v)
struct vm_area_struct *vma = v;
struct mem_size_stats mss = {};

- smap_gather_stats(priv, vma, &mss, 0);
+ smap_gather_stats(priv, vma, &mss, vma->vm_start);

show_map_vma(m, vma);

@@ -1510,7 +1502,6 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
loff_t pos = 0;
int ret = 0;

-
priv->task = get_proc_task(priv->inode);
if (!priv->task)
return -ESRCH;
@@ -1527,15 +1518,10 @@ static int show_smaps_rollup(struct seq_file *m, void *v)

vma_iter_init(&priv->iter, mm, 0);
vma = proc_get_vma(m, &pos);
- if (unlikely(!vma) || vma == get_gate_vma(priv->lock_ctx.mm))
+ if (unlikely(!vma))
goto empty_set;

- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
- goto out_unlock;
- }
-
- vma_start = vma->vm_start;
+ vma_start = IS_ERR(vma) ? 0 : vma->vm_start;
while (vma) {
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
@@ -1545,20 +1531,29 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
if (vma == get_gate_vma(priv->lock_ctx.mm))
break;

- /*
- * If after retaking mmap_lock, already reported VMA grew or
- * merged with the next one, then iterate from last_vma_end.
- */
- smap_gather_stats(priv, vma, &mss,
- vma->vm_start < last_vma_end ? last_vma_end : 0);
+ /* Handles the case of VMA merged since mmap locked drop too. */
+ smap_gather_stats(priv, vma, &mss, last_vma_end);
last_vma_end = vma->vm_end;

/*
- * Release mmap_lock temporarily if someone wants to
- * take it for write request.
+ * If the VMA lock is not taken, we hold the often contended
+ * mmap lock. This can be because the arch doesn't support VMA
+ * locks,or we had to fall back to the mmap lock.
+ *
+ * To relieve pressure, check if it is indeed contended, then
+ * temporarily release it.
*/
- if (is_mmap_lock_contended(priv)) {
- unlock_vma_range(&priv->lock_ctx);
+ if (lock_ctx->mmap_locked && mmap_lock_is_contended(mm)) {
+ unlock_ctx_mm(lock_ctx);
+
+ /*
+ * If we are using VMA locks but fell back to an mmap
+ * lock, we may be able to VMA lock the next VMA, so
+ * reset the lock and try again.
+ *
+ * Otherwise, if the arch doesn't support VMA locks,
+ * this simply retakes the mmap lock.
+ */
ret = lock_vma_range(m, lock_ctx);
if (ret)
goto out_put_mm;
--
2.54.0