Re: [PATCH] [RFC] mm: fs: Invalidate BH LRU during page migration

From: Minchan Kim
Date: Wed Feb 03 2021 - 18:40:42 EST


On Mon, Feb 01, 2021 at 10:55:47PM -0800, Chris Goldsworthy wrote:
> Pages containing buffer_heads that are in the buffer_head LRU cache
> will be pinned and thus cannot be migrated. Correspondingly,
> invalidate the BH LRU before a migration starts and stop any
> buffer_head from being cached in the LRU, until migration has
> finished.

Thanks for the work, Chris. I have a few of comments below.

>
> Signed-off-by: Chris Goldsworthy <cgoldswo@xxxxxxxxxxxxxx>
> Cc: Minchan Kim <minchan@xxxxxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
> ---
> fs/buffer.c | 6 ++++++
> include/linux/buffer_head.h | 3 +++
> include/linux/migrate.h | 2 ++
> mm/migrate.c | 18 ++++++++++++++++++
> mm/page_alloc.c | 3 +++
> mm/swap.c | 3 +++
> 6 files changed, 35 insertions(+)
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 96c7604..39ec4ec 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1289,6 +1289,8 @@ static inline void check_irqs_on(void)
> #endif
> }
>
> +bool bh_migration_done = true;

How about "bh_lru_disable"?

> +
> /*
> * Install a buffer_head into this cpu's LRU. If not already in the LRU, it is
> * inserted at the front, and the buffer_head at the back if any is evicted.
> @@ -1303,6 +1305,9 @@ static void bh_lru_install(struct buffer_head *bh)
> check_irqs_on();
> bh_lru_lock();
>
> + if (!bh_migration_done)
> + goto out;
> +

Let's add why we want it in the description in bh_lru_install's description.

> b = this_cpu_ptr(&bh_lrus);
> for (i = 0; i < BH_LRU_SIZE; i++) {
> swap(evictee, b->bhs[i]);
> @@ -1313,6 +1318,7 @@ static void bh_lru_install(struct buffer_head *bh)
> }
>
> get_bh(bh);
> +out:
> bh_lru_unlock();
> brelse(evictee);
> }
> diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
> index 6b47f94..ae4eb6d 100644
> --- a/include/linux/buffer_head.h
> +++ b/include/linux/buffer_head.h
> @@ -193,6 +193,9 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
> gfp_t gfp);
> struct buffer_head *__bread_gfp(struct block_device *,
> sector_t block, unsigned size, gfp_t gfp);
> +
> +extern bool bh_migration_done;
> +
> void invalidate_bh_lrus(void);
> struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
> void free_buffer_head(struct buffer_head * bh);
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 3a38963..9e4a2dc 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -46,6 +46,7 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
> extern void putback_movable_page(struct page *page);
>
> extern void migrate_prep(void);
> +extern void migrate_finish(void);
> extern void migrate_prep_local(void);
> extern void migrate_page_states(struct page *newpage, struct page *page);
> extern void migrate_page_copy(struct page *newpage, struct page *page);
> @@ -67,6 +68,7 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
> { return -EBUSY; }
>
> static inline int migrate_prep(void) { return -ENOSYS; }
> +static inline int migrate_finish(void) { return -ENOSYS; }
> static inline int migrate_prep_local(void) { return -ENOSYS; }
>
> static inline void migrate_page_states(struct page *newpage, struct page *page)
> diff --git a/mm/migrate.c b/mm/migrate.c
> index a69da8a..08c981d 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -64,6 +64,19 @@
> */
> void migrate_prep(void)
> {
> + bh_migration_done = false;
> +
> + /*
> + * This barrier ensures that callers of bh_lru_install() between
> + * the barrier and the call to invalidate_bh_lrus() read
> + * bh_migration_done() as false.
> + */
> + /*
> + * TODO: Remove me? lru_add_drain_all() already has an smp_mb(),
> + * but it would be good to ensure that the barrier isn't forgotten.
> + */
> + smp_mb();
> +
> /*
> * Clear the LRU lists so pages can be isolated.
> * Note that pages may be moved off the LRU after we have
> @@ -73,6 +86,11 @@ void migrate_prep(void)
> lru_add_drain_all();
> }
>
> +void migrate_finish(void)
> +{
> + bh_migration_done = true;
> +}
> +
> /* Do the necessary work of migrate_prep but not if it involves other CPUs */
> void migrate_prep_local(void)
> {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6446778..e4cb959 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -8493,6 +8493,9 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> ret = migrate_pages(&cc->migratepages, alloc_migration_target,
> NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
> }
> +
> + migrate_finish();
> +
> if (ret < 0) {
> putback_movable_pages(&cc->migratepages);
> return ret;
> diff --git a/mm/swap.c b/mm/swap.c
> index 31b844d..97efc49 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -36,6 +36,7 @@
> #include <linux/hugetlb.h>
> #include <linux/page_idle.h>
> #include <linux/local_lock.h>
> +#include <linux/buffer_head.h>
>
> #include "internal.h"
>
> @@ -759,6 +760,8 @@ void lru_add_drain_all(void)
> if (WARN_ON(!mm_percpu_wq))
> return;
>
> + invalidate_bh_lrus();

Instead of adding a new IPI there, how about adding need_bh_lru_drain(cpu)
in lru_add_drain_all and then calls invalidate_bh_lru in lru_add_drain_cpu?
Not a strong but looks like more harmonized with existing LRU draining
code.

Thanks for the work.