[RFC PATCH v4 05/12] mm: thp: introduce lock/unlock_split_queue{_irqsave}()

From: Muchun Song
Date: Thu May 27 2021 - 05:34:31 EST


We should make thp deferred split queue lock safe when LRU pages
reparented. Similar to lock_page_lruvec{_irqsave, _irq}(), we
introduce lock/unlock_split_queue{_irqsave}() to make the deferred
split queue lock easier to be reparented.

And in the next patch, we can use a similar approach (just like
lruvec lock did) to make thp deferred split queue lock safe when
the LRU pages reparented.

Signed-off-by: Muchun Song <songmuchun@xxxxxxxxxxxxx>
---
mm/huge_memory.c | 96 +++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 74 insertions(+), 22 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 233474770424..d8590408abbb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -496,25 +496,76 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
}

#ifdef CONFIG_MEMCG
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static inline struct mem_cgroup *split_queue_to_memcg(struct deferred_split *queue)
{
- struct mem_cgroup *memcg = page_memcg(compound_head(page));
- struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ return container_of(queue, struct mem_cgroup, deferred_split_queue);
+}
+
+static struct deferred_split *lock_split_queue(struct page *page)
+{
+ struct deferred_split *queue;
+ struct mem_cgroup *memcg;
+
+ memcg = page_memcg(compound_head(page));
+ if (memcg)
+ queue = &memcg->deferred_split_queue;
+ else
+ queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue;
+ spin_lock(&queue->split_queue_lock);
+
+ return queue;
+}

+static struct deferred_split *lock_split_queue_irqsave(struct page *page,
+ unsigned long *flags)
+{
+ struct deferred_split *queue;
+ struct mem_cgroup *memcg;
+
+ memcg = page_memcg(compound_head(page));
if (memcg)
- return &memcg->deferred_split_queue;
+ queue = &memcg->deferred_split_queue;
else
- return &pgdat->deferred_split_queue;
+ queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue;
+ spin_lock_irqsave(&queue->split_queue_lock, *flags);
+
+ return queue;
}
#else
-static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+static struct deferred_split *lock_split_queue(struct page *page)
+{
+ struct deferred_split *queue;
+
+ queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue;
+ spin_lock(&queue->split_queue_lock);
+
+ return queue;
+}
+
+static struct deferred_split *lock_split_queue_irqsave(struct page *page,
+ unsigned long *flags)
+
{
- struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+ struct deferred_split *queue;
+
+ queue = &NODE_DATA(page_to_nid(page))->deferred_split_queue;
+ spin_lock_irqsave(&queue->split_queue_lock, *flags);

- return &pgdat->deferred_split_queue;
+ return queue;
}
#endif

+static inline void unlock_split_queue(struct deferred_split *queue)
+{
+ spin_unlock(&queue->split_queue_lock);
+}
+
+static inline void unlock_split_queue_irqrestore(struct deferred_split *queue,
+ unsigned long flags)
+{
+ spin_unlock_irqrestore(&queue->split_queue_lock, flags);
+}
+
void prep_transhuge_page(struct page *page)
{
/*
@@ -2610,7 +2661,7 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
int split_huge_page_to_list(struct page *page, struct list_head *list)
{
struct page *head = compound_head(page);
- struct deferred_split *ds_queue = get_deferred_split_queue(head);
+ struct deferred_split *ds_queue;
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int mapcount, extra_pins, ret;
@@ -2689,14 +2740,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}

/* Prevent deferred_split_scan() touching ->_refcount */
- spin_lock(&ds_queue->split_queue_lock);
+ ds_queue = lock_split_queue(head);
mapcount = total_mapcount(head);
if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
}
- spin_unlock(&ds_queue->split_queue_lock);
+ unlock_split_queue(ds_queue);
if (mapping) {
int nr = thp_nr_pages(head);

@@ -2711,7 +2762,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
__split_huge_page(page, list, end);
ret = 0;
} else {
- spin_unlock(&ds_queue->split_queue_lock);
+ unlock_split_queue(ds_queue);
fail: if (mapping)
xa_unlock(&mapping->i_pages);
local_irq_enable();
@@ -2733,24 +2784,21 @@ fail: if (mapping)

void free_transhuge_page(struct page *page)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(page);
+ struct deferred_split *ds_queue;
unsigned long flags;

- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = lock_split_queue_irqsave(page, &flags);
if (!list_empty(page_deferred_list(page))) {
ds_queue->split_queue_len--;
list_del(page_deferred_list(page));
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ unlock_split_queue_irqrestore(ds_queue, flags);
free_compound_page(page);
}

void deferred_split_huge_page(struct page *page)
{
- struct deferred_split *ds_queue = get_deferred_split_queue(page);
-#ifdef CONFIG_MEMCG
- struct mem_cgroup *memcg = page_memcg(compound_head(page));
-#endif
+ struct deferred_split *ds_queue;
unsigned long flags;

VM_BUG_ON_PAGE(!PageTransHuge(page), page);
@@ -2768,18 +2816,22 @@ void deferred_split_huge_page(struct page *page)
if (PageSwapCache(page))
return;

- spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+ ds_queue = lock_split_queue_irqsave(page, &flags);
if (list_empty(page_deferred_list(page))) {
count_vm_event(THP_DEFERRED_SPLIT_PAGE);
list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
ds_queue->split_queue_len++;
#ifdef CONFIG_MEMCG
- if (memcg)
+ if (page_memcg(page)) {
+ struct mem_cgroup *memcg;
+
+ memcg = split_queue_to_memcg(ds_queue);
set_shrinker_bit(memcg, page_to_nid(page),
deferred_split_shrinker.id);
+ }
#endif
}
- spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
+ unlock_split_queue_irqrestore(ds_queue, flags);
}

static unsigned long deferred_split_count(struct shrinker *shrink,
--
2.11.0