Re: [PATCH 5/9] mm/madvise: define and use madvise_behavior struct for madvise_do_behavior()

From: Lorenzo Stoakes
Date: Tue Mar 11 2025 - 08:26:39 EST


On Mon, Mar 10, 2025 at 10:23:14AM -0700, SeongJae Park wrote:
> To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and
> MADV_FREE, an mmu_gather object in addition to the behavior integer need
> to be passed to the internal logics. Using a struct can make it easy
> without increasing the number of parameters of all code paths towards
> the internal logic. Define a struct for the purpose and use it on the
> code path that starts from madvise_do_behavior() and ends on
> madvise_dontneed_free().

Oh a helper struct! I like these!

Nitty but...

I wonder if we should just add the the mmu_gather field immediately even if
it isn't used yet?

Also I feel like this patch and 6 should be swapped around, as you are
laying the groundwork here for patch 7 but then doing something unrelated
in 6, unless I'm missing something.

Also maybe add a bit in commit msg about changing the madvise_walk_vmas()
visitor type signature (I wonder if that'd be better as a typedef tbh?)

However, this change looks fine aside from nits (and you know, helper
struct and I'm sold obviously ;) so:

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@xxxxxxxxxx>

>
> Signed-off-by: SeongJae Park <sj@xxxxxxxxxx>
> ---
> mm/madvise.c | 36 ++++++++++++++++++++++++------------
> 1 file changed, 24 insertions(+), 12 deletions(-)
>
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 469c25690a0e..ba2a78795207 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
> return true;
> }
>
> +struct madvise_behavior {
> + int behavior;
> +};
> +
> static long madvise_dontneed_free(struct vm_area_struct *vma,
> struct vm_area_struct **prev,
> unsigned long start, unsigned long end,
> - int behavior)
> + struct madvise_behavior *madv_behavior)

Nitty, but not sure about the need for 'madv_' here. I think keeping this as
'behavior' is fine, as the type is very clear.

> {
> + int behavior = madv_behavior->behavior;
> struct mm_struct *mm = vma->vm_mm;
>
> *prev = vma;
> @@ -1249,8 +1254,10 @@ static long madvise_guard_remove(struct vm_area_struct *vma,
> static int madvise_vma_behavior(struct vm_area_struct *vma,
> struct vm_area_struct **prev,
> unsigned long start, unsigned long end,
> - unsigned long behavior)
> + void *behavior_arg)
> {
> + struct madvise_behavior *arg = behavior_arg;
> + int behavior = arg->behavior;
> int error;
> struct anon_vma_name *anon_name;
> unsigned long new_flags = vma->vm_flags;
> @@ -1270,7 +1277,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
> case MADV_FREE:
> case MADV_DONTNEED:
> case MADV_DONTNEED_LOCKED:
> - return madvise_dontneed_free(vma, prev, start, end, behavior);
> + return madvise_dontneed_free(vma, prev, start, end, arg);
> case MADV_NORMAL:
> new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
> break;
> @@ -1487,10 +1494,10 @@ static bool process_madvise_remote_valid(int behavior)
> */
> static
> int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
> - unsigned long end, unsigned long arg,
> + unsigned long end, void *arg,
> int (*visit)(struct vm_area_struct *vma,
> struct vm_area_struct **prev, unsigned long start,
> - unsigned long end, unsigned long arg))
> + unsigned long end, void *arg))
> {
> struct vm_area_struct *vma;
> struct vm_area_struct *prev;
> @@ -1548,7 +1555,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
> static int madvise_vma_anon_name(struct vm_area_struct *vma,
> struct vm_area_struct **prev,
> unsigned long start, unsigned long end,
> - unsigned long anon_name)
> + void *anon_name)
> {
> int error;
>
> @@ -1557,7 +1564,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
> return -EBADF;
>
> error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
> - (struct anon_vma_name *)anon_name);
> + anon_name);
>
> /*
> * madvise() returns EAGAIN if kernel resources, such as
> @@ -1589,7 +1596,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> if (end == start)
> return 0;
>
> - return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name,
> + return madvise_walk_vmas(mm, start, end, anon_name,
> madvise_vma_anon_name);
> }
> #endif /* CONFIG_ANON_VMA_NAME */
> @@ -1673,8 +1680,10 @@ static bool is_madvise_populate(int behavior)
> }
>
> static int madvise_do_behavior(struct mm_struct *mm,
> - unsigned long start, size_t len_in, int behavior)
> + unsigned long start, size_t len_in,
> + struct madvise_behavior *madv_behavior)
> {
> + int behavior = madv_behavior->behavior;
> struct blk_plug plug;
> unsigned long end;
> int error;
> @@ -1688,7 +1697,7 @@ static int madvise_do_behavior(struct mm_struct *mm,
> if (is_madvise_populate(behavior))
> error = madvise_populate(mm, start, end, behavior);
> else
> - error = madvise_walk_vmas(mm, start, end, behavior,
> + error = madvise_walk_vmas(mm, start, end, madv_behavior,
> madvise_vma_behavior);
> blk_finish_plug(&plug);
> return error;
> @@ -1769,13 +1778,14 @@ static int madvise_do_behavior(struct mm_struct *mm,
> int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior)
> {
> int error;
> + struct madvise_behavior madv_behavior = {.behavior = behavior};
>
> if (madvise_should_skip(start, len_in, behavior, &error))
> return error;
> error = madvise_lock(mm, behavior);
> if (error)
> return error;
> - error = madvise_do_behavior(mm, start, len_in, behavior);
> + error = madvise_do_behavior(mm, start, len_in, &madv_behavior);
> madvise_unlock(mm, behavior);
>
> return error;
> @@ -1792,6 +1802,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
> {
> ssize_t ret = 0;
> size_t total_len;
> + struct madvise_behavior madv_behavior = {.behavior = behavior};
>
> total_len = iov_iter_count(iter);
>
> @@ -1807,7 +1818,8 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
> if (madvise_should_skip(start, len_in, behavior, &error))
> ret = error;
> else
> - ret = madvise_do_behavior(mm, start, len_in, behavior);
> + ret = madvise_do_behavior(mm, start, len_in,
> + &madv_behavior);
> /*
> * An madvise operation is attempting to restart the syscall,
> * but we cannot proceed as it would not be correct to repeat
> --
> 2.39.5