[PATCH 8/9] mm/madvise: batch tlb flushes for [process_]madvise(MADV_{DONTNEED[_LOCKED],FREE})

From: SeongJae Park
Date: Mon Mar 10 2025 - 13:25:22 EST


MADV_DONTNEED[_LOCKED] and MADV_FREE internal logics for
[process_]madvise() can be invoked with batched tlb flushes. Update
vector_madvise() and do_madvise(), which are called for the two system
calls respectively, to use those in the efficient way. Initialize an
mmu_gather object before starting the internal works, and flush the
gathered tlb entries at once after all the internal works are done.

Signed-off-by: SeongJae Park <sj@xxxxxxxxxx>
---
mm/madvise.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 47 insertions(+), 4 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index d7ea71c6422c..d5f4ce3041a4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -905,6 +905,7 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,

struct madvise_behavior {
int behavior;
+ struct mmu_gather *tlb;
};

static long madvise_dontneed_free(struct vm_area_struct *vma,
@@ -964,9 +965,11 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
}

if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED)
- return madvise_dontneed_single_vma(NULL, vma, start, end);
+ return madvise_dontneed_single_vma(
+ madv_behavior->tlb, vma, start, end);
else if (behavior == MADV_FREE)
- return madvise_free_single_vma(NULL, vma, start, end);
+ return madvise_free_single_vma(
+ madv_behavior->tlb, vma, start, end);
else
return -EINVAL;
}
@@ -1639,6 +1642,32 @@ static void madvise_unlock(struct mm_struct *mm, int behavior)
mmap_read_unlock(mm);
}

+static bool madvise_batch_tlb_flush(int behavior)
+{
+ switch (behavior) {
+ case MADV_DONTNEED:
+ case MADV_DONTNEED_LOCKED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void madvise_init_tlb(struct madvise_behavior *madv_behavior,
+ struct mm_struct *mm)
+{
+ if (!madvise_batch_tlb_flush(madv_behavior->behavior))
+ return;
+ tlb_gather_mmu(madv_behavior->tlb, mm);
+}
+
+static void madvise_finish_tlb(struct madvise_behavior *madv_behavior)
+{
+ if (!madvise_batch_tlb_flush(madv_behavior->behavior))
+ return;
+ tlb_finish_mmu(madv_behavior->tlb);
+}
+
static bool is_valid_madvise(unsigned long start, size_t len_in, int behavior)
{
size_t len;
@@ -1791,14 +1820,20 @@ static int madvise_do_behavior(struct mm_struct *mm,
int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
{
int error;
- struct madvise_behavior madv_behavior = {.behavior = behavior};
+ struct mmu_gather tlb;
+ struct madvise_behavior madv_behavior = {
+ .behavior = behavior,
+ .tlb = &tlb,
+ };

if (madvise_should_skip(start, len_in, behavior, &error))
return error;
error = madvise_lock(mm, behavior);
if (error)
return error;
+ madvise_init_tlb(&madv_behavior, mm);
error = madvise_do_behavior(mm, start, len_in, &madv_behavior);
+ madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);

return error;
@@ -1815,13 +1850,18 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
{
ssize_t ret = 0;
size_t total_len;
- struct madvise_behavior madv_behavior = {.behavior = behavior};
+ struct mmu_gather tlb;
+ struct madvise_behavior madv_behavior = {
+ .behavior = behavior,
+ .tlb = &tlb,
+ };

total_len = iov_iter_count(iter);

ret = madvise_lock(mm, behavior);
if (ret)
return ret;
+ madvise_init_tlb(&madv_behavior, mm);

while (iov_iter_count(iter)) {
unsigned long start = (unsigned long)iter_iov_addr(iter);
@@ -1850,14 +1890,17 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
}

/* Drop and reacquire lock to unwind race. */
+ madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);
madvise_lock(mm, behavior);
+ madvise_init_tlb(&madv_behavior, mm);
continue;
}
if (ret < 0)
break;
iov_iter_advance(iter, iter_iov_len(iter));
}
+ madvise_finish_tlb(&madv_behavior);
madvise_unlock(mm, behavior);

ret = (total_len - iov_iter_count(iter)) ? : ret;
--
2.39.5