[PATCH] mm: wake_up only when swapcache_wq waitqueue is active

From: Barry Song
Date: Tue Oct 08 2024 - 08:18:27 EST


wake_up() will acquire spinlock even waitqueue is empty. This might
involve cache sync overhead. Let's only call wake_up() when waitqueue
is active.

Suggested-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
Signed-off-by: Barry Song <v-songbaohua@xxxxxxxx>
---
mm/memory.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index fe21bd3beff5..4adb2d0bcc7a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4623,7 +4623,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* Clear the swap cache pin for direct swapin after PTL unlock */
if (need_clear_cache) {
swapcache_clear(si, entry, nr_pages);
- wake_up(&swapcache_wq);
+ if (waitqueue_active(&swapcache_wq))
+ wake_up(&swapcache_wq);
}
if (si)
put_swap_device(si);
@@ -4641,7 +4642,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
if (need_clear_cache) {
swapcache_clear(si, entry, nr_pages);
- wake_up(&swapcache_wq);
+ if (waitqueue_active(&swapcache_wq))
+ wake_up(&swapcache_wq);
}
if (si)
put_swap_device(si);
--
2.39.3 (Apple Git-146)

>
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 2366578015ad..aae0e532d8b6 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -4192,6 +4192,23 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> >  }
> >  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> >
> > +/*
> > + * Alleviating the 'thundering herd' phenomenon using a waitqueue hash
> > + * when multiple do_swap_page() operations occur simultaneously.
> > + */
> > +#define SWAPCACHE_WAIT_TABLE_BITS 5
> > +#define SWAPCACHE_WAIT_TABLE_SIZE (1 << SWAPCACHE_WAIT_TABLE_BITS)
> > +static wait_queue_head_t swapcache_wqs[SWAPCACHE_WAIT_TABLE_SIZE];
> > +
> > +static int __init swapcache_wqs_init(void)
> > +{
> > +     for (int i = 0; i < SWAPCACHE_WAIT_TABLE_SIZE; i++)
> > +             init_waitqueue_head(&swapcache_wqs[i]);
> > +
> > +        return 0;
> > +}
> > +late_initcall(swapcache_wqs_init);
> > +
> >  /*
> >   * We enter with non-exclusive mmap_lock (to exclude vma changes,
> >   * but allow concurrent faults), and pte mapped but not yet locked.
> > @@ -4204,6 +4221,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >  {
> >       struct vm_area_struct *vma = vmf->vma;
> >       struct folio *swapcache, *folio = NULL;
> > +     DECLARE_WAITQUEUE(wait, current);
> > +     wait_queue_head_t *swapcache_wq;
> >       struct page *page;
> >       struct swap_info_struct *si = NULL;
> >       rmap_t rmap_flags = RMAP_NONE;
> > @@ -4297,12 +4316,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >                                * undetectable as pte_same() returns true due
> >                                * to entry reuse.
> >                                */
> > +                             swapcache_wq = &swapcache_wqs[hash_long(vmf->address & PMD_MASK,
> > +                                                     SWAPCACHE_WAIT_TABLE_BITS)];
> >                               if (swapcache_prepare(entry, nr_pages)) {
> >                                       /*
> >                                        * Relax a bit to prevent rapid
> >                                        * repeated page faults.
> >                                        */
> > +                                     add_wait_queue(swapcache_wq, &wait);
> >                                       schedule_timeout_uninterruptible(1);
> > +                                     remove_wait_queue(swapcache_wq, &wait);
> >                                       goto out_page;
> >                               }
> >                               need_clear_cache = true;
> > @@ -4609,8 +4632,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >               pte_unmap_unlock(vmf->pte, vmf->ptl);
> >  out:
> >       /* Clear the swap cache pin for direct swapin after PTL unlock */
> > -     if (need_clear_cache)
> > +     if (need_clear_cache) {
> >               swapcache_clear(si, entry, nr_pages);
> > +             wake_up(swapcache_wq);
> > +     }
> >       if (si)
> >               put_swap_device(si);
> >       return ret;
> > @@ -4625,8 +4650,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> >               folio_unlock(swapcache);
> >               folio_put(swapcache);
> >       }
> > -     if (need_clear_cache)
> > +     if (need_clear_cache) {
> >               swapcache_clear(si, entry, nr_pages);
> > +             wake_up(swapcache_wq);
> > +     }
> >       if (si)
> >               put_swap_device(si);
> >       return ret;
>
> --
> Best Regards,
> Huang, Ying