[PATCH] mm: wake_up only when swapcache_wq waitqueue is active
From: Barry Song
Date: Tue Oct 08 2024 - 08:18:27 EST
wake_up() will acquire spinlock even waitqueue is empty. This might
involve cache sync overhead. Let's only call wake_up() when waitqueue
is active.
Suggested-by: "Huang, Ying" <ying.huang@xxxxxxxxx>
Signed-off-by: Barry Song <v-songbaohua@xxxxxxxx>
---
mm/memory.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index fe21bd3beff5..4adb2d0bcc7a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4623,7 +4623,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* Clear the swap cache pin for direct swapin after PTL unlock */
if (need_clear_cache) {
swapcache_clear(si, entry, nr_pages);
- wake_up(&swapcache_wq);
+ if (waitqueue_active(&swapcache_wq))
+ wake_up(&swapcache_wq);
}
if (si)
put_swap_device(si);
@@ -4641,7 +4642,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
if (need_clear_cache) {
swapcache_clear(si, entry, nr_pages);
- wake_up(&swapcache_wq);
+ if (waitqueue_active(&swapcache_wq))
+ wake_up(&swapcache_wq);
}
if (si)
put_swap_device(si);
--
2.39.3 (Apple Git-146)
>
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 2366578015ad..aae0e532d8b6 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -4192,6 +4192,23 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> > Â }
> > Â #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> >
> > +/*
> > + * Alleviating the 'thundering herd' phenomenon using a waitqueue hash
> > + * when multiple do_swap_page() operations occur simultaneously.
> > + */
> > +#define SWAPCACHE_WAIT_TABLE_BITS 5
> > +#define SWAPCACHE_WAIT_TABLE_SIZE (1 << SWAPCACHE_WAIT_TABLE_BITS)
> > +static wait_queue_head_t swapcache_wqs[SWAPCACHE_WAIT_TABLE_SIZE];
> > +
> > +static int __init swapcache_wqs_init(void)
> > +{
> > + Â Â for (int i = 0; i < SWAPCACHE_WAIT_TABLE_SIZE; i++)
> > + Â Â Â Â Â Â init_waitqueue_head(&swapcache_wqs[i]);
> > +
> > + Â Â Â Â return 0;
> > +}
> > +late_initcall(swapcache_wqs_init);
> > +
> > Â /*
> > Â * We enter with non-exclusive mmap_lock (to exclude vma changes,
> > Â * but allow concurrent faults), and pte mapped but not yet locked.
> > @@ -4204,6 +4221,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> > Â {
> > Â Â Â struct vm_area_struct *vma = vmf->vma;
> > Â Â Â struct folio *swapcache, *folio = NULL;
> > + Â Â DECLARE_WAITQUEUE(wait, current);
> > + Â Â wait_queue_head_t *swapcache_wq;
> > Â Â Â struct page *page;
> > Â Â Â struct swap_info_struct *si = NULL;
> > Â Â Â rmap_t rmap_flags = RMAP_NONE;
> > @@ -4297,12 +4316,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * undetectable as pte_same() returns true due
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * to entry reuse.
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â */
> > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â swapcache_wq = &swapcache_wqs[hash_long(vmf->address & PMD_MASK,
> > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â SWAPCACHE_WAIT_TABLE_BITS)];
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (swapcache_prepare(entry, nr_pages)) {
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â /*
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * Relax a bit to prevent rapid
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * repeated page faults.
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â */
> > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â add_wait_queue(swapcache_wq, &wait);
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â schedule_timeout_uninterruptible(1);
> > + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â remove_wait_queue(swapcache_wq, &wait);
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â goto out_page;
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â }
> > Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â need_clear_cache = true;
> > @@ -4609,8 +4632,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> > Â Â Â Â Â Â Â pte_unmap_unlock(vmf->pte, vmf->ptl);
> > Â out:
> > Â Â Â /* Clear the swap cache pin for direct swapin after PTL unlock */
> > - Â Â if (need_clear_cache)
> > + Â Â if (need_clear_cache) {
> > Â Â Â Â Â Â Â swapcache_clear(si, entry, nr_pages);
> > + Â Â Â Â Â Â wake_up(swapcache_wq);
> > + Â Â }
> > Â Â Â if (si)
> > Â Â Â Â Â Â Â put_swap_device(si);
> > Â Â Â return ret;
> > @@ -4625,8 +4650,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
> > Â Â Â Â Â Â Â folio_unlock(swapcache);
> > Â Â Â Â Â Â Â folio_put(swapcache);
> > Â Â Â }
> > - Â Â if (need_clear_cache)
> > + Â Â if (need_clear_cache) {
> > Â Â Â Â Â Â Â swapcache_clear(si, entry, nr_pages);
> > + Â Â Â Â Â Â wake_up(swapcache_wq);
> > + Â Â }
> > Â Â Â if (si)
> > Â Â Â Â Â Â Â put_swap_device(si);
> > Â Â Â return ret;
>
> --
> Best Regards,
> Huang, Ying