Re: [PATCH] percpu: flush tlb after pcpu_depopulate_chunk()

From: Guenter Roeck
Date: Sat Jul 03 2021 - 01:14:58 EST


On Sat, Jul 03, 2021 at 04:04:49AM +0000, Dennis Zhou wrote:
> Prior to "percpu: implement partial chunk depopulation",
> pcpu_depopulate_chunk() was called only on the destruction path. This
> meant the virtual address range was on its way back to vmalloc which
> will handle flushing the tlbs for us.
>
> However, now that we call pcpu_depopulate_chunk() during the active
> lifecycle of a chunk, we need to flush the tlb as well otherwise we can
> end up accessing the wrong page through an invalid tlb mapping.
>
> This was reported in [1].
>
> [1] https://lore.kernel.org/lkml/20210702191140.GA3166599@xxxxxxxxxxxx/
>
> Fixes: f183324133ea ("percpu: implement partial chunk depopulation")
> Reported-by: Guenter Roeck <linux@xxxxxxxxxxxx>
> Signed-off-by: Dennis Zhou <dennis@xxxxxxxxxx>

Tested-by: Guenter Roeck <linux@xxxxxxxxxxxx>

Thanks!
Guenter

> ---
> mm/percpu-km.c | 3 ++-
> mm/percpu-vm.c | 11 +++++++++--
> mm/percpu.c | 7 ++++---
> 3 files changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/mm/percpu-km.c b/mm/percpu-km.c
> index c9d529dc7651..6875fc3b2ed7 100644
> --- a/mm/percpu-km.c
> +++ b/mm/percpu-km.c
> @@ -39,7 +39,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
> }
>
> static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
> - int page_start, int page_end)
> + int page_start, int page_end,
> + bool flush_tlb)
> {
> /* nada */
> }
> diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
> index ee5d89fcd66f..6353cda1718e 100644
> --- a/mm/percpu-vm.c
> +++ b/mm/percpu-vm.c
> @@ -299,6 +299,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
> * @chunk: chunk to depopulate
> * @page_start: the start page
> * @page_end: the end page
> + * @flush_tlb: if should we flush the tlb
> *
> * For each cpu, depopulate and unmap pages [@page_start,@page_end)
> * from @chunk.
> @@ -307,7 +308,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
> * pcpu_alloc_mutex.
> */
> static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
> - int page_start, int page_end)
> + int page_start, int page_end,
> + bool flush_tlb)
> {
> struct page **pages;
>
> @@ -324,7 +326,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
>
> pcpu_unmap_pages(chunk, pages, page_start, page_end);
>
> - /* no need to flush tlb, vmalloc will handle it lazily */
> + /*
> + * We need to flush the tlb unless the caller will pass it to vmalloc,
> + * which will handle flushing for us.
> + */
> + if (flush_tlb)
> + pcpu_post_unmap_tlb_flush(chunk, page_start, page_end);
>
> pcpu_free_pages(chunk, pages, page_start, page_end);
> }
> diff --git a/mm/percpu.c b/mm/percpu.c
> index b4cebeca4c0c..e23ba0d22220 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1580,7 +1580,8 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
> static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
> int page_start, int page_end, gfp_t gfp);
> static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
> - int page_start, int page_end);
> + int page_start, int page_end,
> + bool flush_tlb);
> static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
> static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
> static struct page *pcpu_addr_to_page(void *addr);
> @@ -2016,7 +2017,7 @@ static void pcpu_balance_free(bool empty_only)
>
> bitmap_for_each_set_region(chunk->populated, rs, re, 0,
> chunk->nr_pages) {
> - pcpu_depopulate_chunk(chunk, rs, re);
> + pcpu_depopulate_chunk(chunk, rs, re, false);
> spin_lock_irq(&pcpu_lock);
> pcpu_chunk_depopulated(chunk, rs, re);
> spin_unlock_irq(&pcpu_lock);
> @@ -2189,7 +2190,7 @@ static void pcpu_reclaim_populated(void)
> continue;
>
> spin_unlock_irq(&pcpu_lock);
> - pcpu_depopulate_chunk(chunk, i + 1, end + 1);
> + pcpu_depopulate_chunk(chunk, i + 1, end + 1, true);
> cond_resched();
> spin_lock_irq(&pcpu_lock);
>
> --
> 2.32.0.93.g670b81a890-goog
>