[PATCH v2] [RFC] mm: fs: Invalidate BH LRU during page migration

From: Chris Goldsworthy
Date: Thu Feb 11 2021 - 00:37:09 EST


Pages containing buffer_heads that are in one of the per-CPU
buffer_head LRU caches will be pinned and thus cannot be migrated.
This can prevent CMA allocations from succeeding, which are often used
on platforms with co-processors (such as a DSP) that can only use
physically contiguous memory. It can also prevent memory
hot-unplugging from succeeding, which involves migrating at least
MIN_MEMORY_BLOCK_SIZE bytes of memory, which ranges from 8 MiB to 1
GiB based on the architecture in use.

Correspondingly, invalidate the BH LRU caches before a migration
starts and stop any buffer_head from being cached in the LRU caches,
until migration has finished.

Signed-off-by: Chris Goldsworthy <cgoldswo@xxxxxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---
fs/buffer.c | 54 +++++++++++++++++++++++++++++++++++++++++++--
include/linux/buffer_head.h | 8 +++++++
include/linux/migrate.h | 2 ++
mm/migrate.c | 19 ++++++++++++++++
mm/page_alloc.c | 3 +++
mm/swap.c | 7 +++++-
6 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 96c7604..634e474 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1274,6 +1274,10 @@ struct bh_lru {

static DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};

+/* These are used to control the BH LRU invalidation during page migration */
+static struct cpumask lru_needs_invalidation;
+static bool bh_lru_disabled = false;
+
#ifdef CONFIG_SMP
#define bh_lru_lock() local_irq_disable()
#define bh_lru_unlock() local_irq_enable()
@@ -1292,7 +1296,9 @@ static inline void check_irqs_on(void)
/*
* Install a buffer_head into this cpu's LRU. If not already in the LRU, it is
* inserted at the front, and the buffer_head at the back if any is evicted.
- * Or, if already in the LRU it is moved to the front.
+ * Or, if already in the LRU it is moved to the front. Note that if LRU is
+ * disabled because of an ongoing page migration, we won't insert bh into the
+ * LRU.
*/
static void bh_lru_install(struct buffer_head *bh)
{
@@ -1303,6 +1309,9 @@ static void bh_lru_install(struct buffer_head *bh)
check_irqs_on();
bh_lru_lock();

+ if (bh_lru_disabled)
+ goto out;
+
b = this_cpu_ptr(&bh_lrus);
for (i = 0; i < BH_LRU_SIZE; i++) {
swap(evictee, b->bhs[i]);
@@ -1313,6 +1322,7 @@ static void bh_lru_install(struct buffer_head *bh)
}

get_bh(bh);
+out:
bh_lru_unlock();
brelse(evictee);
}
@@ -1328,6 +1338,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)

check_irqs_on();
bh_lru_lock();
+
+ if (bh_lru_disabled)
+ goto out;
+
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);

@@ -1346,6 +1360,7 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
break;
}
}
+out:
bh_lru_unlock();
return ret;
}
@@ -1446,7 +1461,7 @@ EXPORT_SYMBOL(__bread_gfp);
* This doesn't race because it runs in each cpu either in irq
* or with preempt disabled.
*/
-static void invalidate_bh_lru(void *arg)
+void invalidate_bh_lru(void *arg)
{
struct bh_lru *b = &get_cpu_var(bh_lrus);
int i;
@@ -1477,6 +1492,41 @@ void invalidate_bh_lrus(void)
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);

+bool need_bh_lru_invalidation(int cpu)
+{
+ return cpumask_test_cpu(cpu, &lru_needs_invalidation);
+}
+
+void bh_lru_disable(void)
+{
+ int cpu;
+
+ bh_lru_disabled = true;
+
+ /*
+ * This barrier ensures that invocations of bh_lru_install()
+ * after this barrier see that bh_lru_disabled == true (until
+ * bh_lru_enable() is eventually called)..
+ */
+ smp_mb();
+
+ /*
+ * It's alright if someone comes along and hot-plugs a new CPU,
+ * since we have that bh_lru_dsiabled == true. The hot-remove
+ * case is handled in buffer_exit_cpu_dead().
+ */
+ for_each_online_cpu(cpu) {
+ if (has_bh_in_lru(cpu, NULL))
+ cpumask_set_cpu(cpu, &lru_needs_invalidation);
+ }
+}
+
+void bh_lru_enable(void)
+{
+ bh_lru_disabled = false;
+ cpumask_clear(&lru_needs_invalidation);
+}
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6b47f94..78eb5ee 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -193,7 +193,11 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
gfp_t gfp);
struct buffer_head *__bread_gfp(struct block_device *,
sector_t block, unsigned size, gfp_t gfp);
+void invalidate_bh_lru(void *arg);
void invalidate_bh_lrus(void);
+bool need_bh_lru_invalidation(int cpu);
+void bh_lru_disable(void);
+void bh_lru_enable(void);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
void unlock_buffer(struct buffer_head *bh);
@@ -401,6 +405,10 @@ extern int __set_page_dirty_buffers(struct page *page);
#else /* CONFIG_BLOCK */

static inline void buffer_init(void) {}
+static inline void invalidate_bh_lru(void) {}
+static inline bool need_bh_lru_invalidation(int cpu) { return false; }
+static inline void bh_lru_disable(void) {}
+static inline void bh_lru_enable(void) {}
static inline int try_to_free_buffers(struct page *page) { return 1; }
static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3a38963..9e4a2dc 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -46,6 +46,7 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);

extern void migrate_prep(void);
+extern void migrate_finish(void);
extern void migrate_prep_local(void);
extern void migrate_page_states(struct page *newpage, struct page *page);
extern void migrate_page_copy(struct page *newpage, struct page *page);
@@ -67,6 +68,7 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }

static inline int migrate_prep(void) { return -ENOSYS; }
+static inline int migrate_finish(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }

static inline void migrate_page_states(struct page *newpage, struct page *page)
diff --git a/mm/migrate.c b/mm/migrate.c
index a69da8a..a8928ee7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -65,6 +65,16 @@
void migrate_prep(void)
{
/*
+ * If a page has buffer_heads contained in one of the per-cpu
+ * BH LRU caches, that page can't be migrated. Accordingly, we
+ * call bh_lru_disable() to prevent further buffer_heads from
+ * being cached, before we invalidate the LRUs in
+ * lru_add_drain_all(). The LRUs are re-enabled in
+ * migrate_finish().
+ */
+ bh_lru_disable();
+
+ /*
* Clear the LRU lists so pages can be isolated.
* Note that pages may be moved off the LRU after we have
* drained them. Those pages will fail to migrate like other
@@ -73,6 +83,15 @@ void migrate_prep(void)
lru_add_drain_all();
}

+void migrate_finish(void)
+{
+ /*
+ * Renable the per-cpu BH LRU caches, after having disabled them
+ * in migrate_prep().
+ */
+ bh_lru_enable();
+}
+
/* Do the necessary work of migrate_prep but not if it involves other CPUs */
void migrate_prep_local(void)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6446778..e4cb959 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8493,6 +8493,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
ret = migrate_pages(&cc->migratepages, alloc_migration_target,
NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
}
+
+ migrate_finish();
+
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
return ret;
diff --git a/mm/swap.c b/mm/swap.c
index 31b844d..c733c95 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -36,6 +36,7 @@
#include <linux/hugetlb.h>
#include <linux/page_idle.h>
#include <linux/local_lock.h>
+#include <linux/buffer_head.h>

#include "internal.h"

@@ -628,6 +629,9 @@ void lru_add_drain_cpu(int cpu)
if (pagevec_count(pvec))
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);

+ if (need_bh_lru_invalidation(cpu))
+ invalidate_bh_lru(NULL);
+
activate_page_drain(cpu);
}

@@ -815,7 +819,8 @@ void lru_add_drain_all(void)
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
- need_activate_page_drain(cpu)) {
+ need_activate_page_drain(cpu) ||
+ need_bh_lru_invalidation(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
__cpumask_set_cpu(cpu, &has_work);
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project