[PATCH 4/4] percpu: use reclaim threshold instead of running for every page

From: Dennis Zhou
Date: Mon Apr 19 2021 - 18:51:06 EST


The last patch implements reclaim by adding 2 additional lists where a
chunk's lifecycle is:
active_slot -> to_depopulate_slot -> sidelined_slot

This worked great because we're able to nicely converge paths into
isolation. However, it's a bit aggressive to run for every free page.
Let's accumulate a few free pages before we do this. To do this, the new
lifecycle is:
active_slot -> sidelined_slot -> to_depopulate_slot -> sidelined_slot

The transition from sidelined_slot -> to_depopulate_slot occurs on a
threshold instead of before where it directly went to the
to_depopulate_slot. pcpu_nr_isolated_empty_pop_pages[] is introduced to
aid with this.

Suggested-by: Roman Gushchin <guro@xxxxxx>
Signed-off-by: Dennis Zhou <dennis@xxxxxxxxxx>
---
mm/percpu-internal.h | 1 +
mm/percpu-stats.c | 8 ++++++--
mm/percpu.c | 44 +++++++++++++++++++++++++++++++++++++-------
3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 10604dce806f..b3e43b016276 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -92,6 +92,7 @@ extern int pcpu_nr_slots;
extern int pcpu_sidelined_slot;
extern int pcpu_to_depopulate_slot;
extern int pcpu_nr_empty_pop_pages[];
+extern int pcpu_nr_isolated_empty_pop_pages[];

extern struct pcpu_chunk *pcpu_first_chunk;
extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 2125981acfb9..facc804eb86c 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -145,7 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
int slot, max_nr_alloc;
int *buffer;
enum pcpu_chunk_type type;
- int nr_empty_pop_pages;
+ int nr_empty_pop_pages, nr_isolated_empty_pop_pages;

alloc_buffer:
spin_lock_irq(&pcpu_lock);
@@ -167,8 +167,11 @@ static int percpu_stats_show(struct seq_file *m, void *v)
}

nr_empty_pop_pages = 0;
- for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+ nr_isolated_empty_pop_pages = 0;
+ for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
+ nr_isolated_empty_pop_pages += pcpu_nr_isolated_empty_pop_pages[type];
+ }

#define PL(X) \
seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
@@ -202,6 +205,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
PU(min_alloc_size);
PU(max_alloc_size);
P("empty_pop_pages", nr_empty_pop_pages);
+ P("iso_empty_pop_pages", nr_isolated_empty_pop_pages);
seq_putc(m, '\n');

#undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index 79eebc80860d..ba13e683d022 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -110,6 +110,9 @@
#define PCPU_EMPTY_POP_PAGES_LOW 2
#define PCPU_EMPTY_POP_PAGES_HIGH 4

+/* only schedule reclaim if there are at least N empty pop pages sidelined */
+#define PCPU_EMPTY_POP_RECLAIM_THRESHOLD 4
+
#ifdef CONFIG_SMP
/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
#ifndef __addr_to_pcpu_ptr
@@ -183,6 +186,7 @@ static LIST_HEAD(pcpu_map_extend_chunks);
* The reserved chunk doesn't contribute to the count.
*/
int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
+int pcpu_nr_isolated_empty_pop_pages[PCPU_NR_CHUNK_TYPES];

/*
* The number of populated pages in use by the allocator, protected by
@@ -582,8 +586,10 @@ static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
if (!chunk->isolated) {
chunk->isolated = true;
pcpu_nr_empty_pop_pages[type] -= chunk->nr_empty_pop_pages;
+ pcpu_nr_isolated_empty_pop_pages[type] +=
+ chunk->nr_empty_pop_pages;
+ list_move(&chunk->list, &pcpu_slot[pcpu_sidelined_slot]);
}
- list_move(&chunk->list, &pcpu_slot[pcpu_to_depopulate_slot]);
}

static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
@@ -595,6 +601,8 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
if (chunk->isolated) {
chunk->isolated = false;
pcpu_nr_empty_pop_pages[type] += chunk->nr_empty_pop_pages;
+ pcpu_nr_isolated_empty_pop_pages[type] -=
+ chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(chunk, -1);
}
}
@@ -610,9 +618,15 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
*/
static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
{
+ enum pcpu_chunk_type type = pcpu_chunk_type(chunk);
+
chunk->nr_empty_pop_pages += nr;
- if (chunk != pcpu_reserved_chunk && !chunk->isolated)
- pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
+ if (chunk != pcpu_reserved_chunk) {
+ if (chunk->isolated)
+ pcpu_nr_isolated_empty_pop_pages[type] += nr;
+ else
+ pcpu_nr_empty_pop_pages[type] += nr;
+ }
}

/*
@@ -2138,10 +2152,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
struct list_head *pcpu_slot = pcpu_chunk_list(type);
struct pcpu_chunk *chunk;
struct pcpu_block_md *block;
+ LIST_HEAD(to_depopulate);
int i, end;

spin_lock_irq(&pcpu_lock);

+ list_splice_init(&pcpu_slot[pcpu_to_depopulate_slot], &to_depopulate);
+
restart:
/*
* Once a chunk is isolated to the to_depopulate list, the chunk is no
@@ -2149,9 +2166,9 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
* other accessor is the free path which only returns area back to the
* allocator not touching the populated bitmap.
*/
- while (!list_empty(&pcpu_slot[pcpu_to_depopulate_slot])) {
- chunk = list_first_entry(&pcpu_slot[pcpu_to_depopulate_slot],
- struct pcpu_chunk, list);
+ while (!list_empty(&to_depopulate)) {
+ chunk = list_first_entry(&to_depopulate, struct pcpu_chunk,
+ list);
WARN_ON(chunk->immutable);

/*
@@ -2208,6 +2225,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
&pcpu_slot[pcpu_sidelined_slot]);
}

+ if (pcpu_nr_isolated_empty_pop_pages[type] >=
+ PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+ list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+ &pcpu_slot[pcpu_to_depopulate_slot]);
+ pcpu_schedule_balance_work();
+ }
+
spin_unlock_irq(&pcpu_lock);
}

@@ -2291,7 +2315,13 @@ void free_percpu(void __percpu *ptr)
}
} else if (pcpu_should_reclaim_chunk(chunk)) {
pcpu_isolate_chunk(chunk);
- need_balance = true;
+ if (chunk->free_bytes == pcpu_unit_size ||
+ pcpu_nr_isolated_empty_pop_pages[pcpu_chunk_type(chunk)] >=
+ PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+ list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+ &pcpu_slot[pcpu_to_depopulate_slot]);
+ need_balance = true;
+ }
}

trace_percpu_free_percpu(chunk->base_addr, off, ptr);
--
2.31.1.368.gbe11c130af-goog