Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c
From: Donet Tom
Date: Thu Mar 12 2026 - 04:46:36 EST
Hi Alexander
On 3/11/26 4:32 PM, Alexandre Ghiti wrote:
Let's have all the demotion functions in this file, no functional
change intended.
Suggested-by: Gregory Price <gourry@xxxxxxxxxx>
Signed-off-by: Alexandre Ghiti <alex@xxxxxxxx>
---
include/linux/memory-tiers.h | 18 ++++++++
mm/memory-tiers.c | 75 +++++++++++++++++++++++++++++++++
mm/vmscan.c | 80 +-----------------------------------
3 files changed, 94 insertions(+), 79 deletions(-)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 96987d9d95a8..0bf0d002939e 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
int next_demotion_node(int node, const nodemask_t *allowed_mask);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
bool node_is_toptier(int node);
+unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg);
#else
static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
{
@@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
{
return true;
}
+
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
#endif
#else
@@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
return true;
}
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
{
return 0;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 986f809376eb..afdf21738a54 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -7,6 +7,7 @@
#include <linux/memory-tiers.h>
#include <linux/notifier.h>
#include <linux/sched/sysctl.h>
+#include <linux/migrate.h>
#include "internal.h"
@@ -373,6 +374,80 @@ int next_demotion_node(int node, const nodemask_t *allowed_mask)
return find_next_best_node(node, &mask);
}
+static struct folio *alloc_demote_folio(struct folio *src,
+ unsigned long private)
+{
+ struct folio *dst;
+ nodemask_t *allowed_mask;
+ struct migration_target_control *mtc;
+
+ mtc = (struct migration_target_control *)private;
+
+ allowed_mask = mtc->nmask;
+ /*
+ * make sure we allocate from the target node first also trying to
+ * demote or reclaim pages from the target node via kswapd if we are
+ * low on free memory on target node. If we don't do this and if
+ * we have free memory on the slower(lower) memtier, we would start
+ * allocating pages from slower(lower) memory tiers without even forcing
+ * a demotion of cold pages from the target memtier. This can result
+ * in the kernel placing hot pages in slower(lower) memory tiers.
+ */
+ mtc->nmask = NULL;
+ mtc->gfp_mask |= __GFP_THISNODE;
+ dst = alloc_migration_target(src, (unsigned long)mtc);
+ if (dst)
+ return dst;
+
+ mtc->gfp_mask &= ~__GFP_THISNODE;
+ mtc->nmask = allowed_mask;
+
+ return alloc_migration_target(src, (unsigned long)mtc);
+}
+
+unsigned int mt_demote_folios(struct list_head *demote_folios,
Demotion will happen only when different memory tiers are present, right? Since demote_folios() already implies that the folios are being demoted to a lower tier, is the mt_ prefix needed in the function name? I’m fine with keeping it as is, but I just wanted to clarify.
Otherwise it LGTM
Reviewed by: Donet Tom <donettom@xxxxxxxxxxxxx>
+ struct pglist_data *pgdat,
+ struct mem_cgroup *memcg)
+{
+ int target_nid;
+ unsigned int nr_succeeded;
+ nodemask_t allowed_mask;
+
+ struct migration_target_control mtc = {
+ /*
+ * Allocate from 'node', or fail quickly and quietly.
+ * When this happens, 'page' will likely just be discarded
+ * instead of migrated.
+ */
+ .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+ __GFP_NOMEMALLOC | GFP_NOWAIT,
+ .nmask = &allowed_mask,
+ .reason = MR_DEMOTION,
+ };
+
+ if (list_empty(demote_folios))
+ return 0;
+
+ node_get_allowed_targets(pgdat, &allowed_mask);
+ mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
+ if (nodes_empty(allowed_mask))
+ return 0;
+
+ target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
+ if (target_nid == NUMA_NO_NODE)
+ /* No lower-tier nodes or nodes were hot-unplugged. */
+ return 0;
+
+ mtc.nid = target_nid;
+
+ /* Demotion ignores all cpuset and mempolicy settings */
+ migrate_pages(demote_folios, alloc_demote_folio, NULL,
+ (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
+ &nr_succeeded);
+
+ return nr_succeeded;
+}
+
static void disable_all_demotion_targets(void)
{
struct memory_tier *memtier;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fc9373e8251..5e0138b94480 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct folio *folio,
mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
}
-static struct folio *alloc_demote_folio(struct folio *src,
- unsigned long private)
-{
- struct folio *dst;
- nodemask_t *allowed_mask;
- struct migration_target_control *mtc;
-
- mtc = (struct migration_target_control *)private;
-
- allowed_mask = mtc->nmask;
- /*
- * make sure we allocate from the target node first also trying to
- * demote or reclaim pages from the target node via kswapd if we are
- * low on free memory on target node. If we don't do this and if
- * we have free memory on the slower(lower) memtier, we would start
- * allocating pages from slower(lower) memory tiers without even forcing
- * a demotion of cold pages from the target memtier. This can result
- * in the kernel placing hot pages in slower(lower) memory tiers.
- */
- mtc->nmask = NULL;
- mtc->gfp_mask |= __GFP_THISNODE;
- dst = alloc_migration_target(src, (unsigned long)mtc);
- if (dst)
- return dst;
-
- mtc->gfp_mask &= ~__GFP_THISNODE;
- mtc->nmask = allowed_mask;
-
- return alloc_migration_target(src, (unsigned long)mtc);
-}
-
-/*
- * Take folios on @demote_folios and attempt to demote them to another node.
- * Folios which are not demoted are left on @demote_folios.
- */
-static unsigned int demote_folio_list(struct list_head *demote_folios,
- struct pglist_data *pgdat,
- struct mem_cgroup *memcg)
-{
- int target_nid;
- unsigned int nr_succeeded;
- nodemask_t allowed_mask;
-
- struct migration_target_control mtc = {
- /*
- * Allocate from 'node', or fail quickly and quietly.
- * When this happens, 'page' will likely just be discarded
- * instead of migrated.
- */
- .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
- __GFP_NOMEMALLOC | GFP_NOWAIT,
- .nmask = &allowed_mask,
- .reason = MR_DEMOTION,
- };
-
- if (list_empty(demote_folios))
- return 0;
-
- node_get_allowed_targets(pgdat, &allowed_mask);
- mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
- if (nodes_empty(allowed_mask))
- return 0;
-
- target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
- if (target_nid == NUMA_NO_NODE)
- /* No lower-tier nodes or nodes were hot-unplugged. */
- return 0;
-
- mtc.nid = target_nid;
-
- /* Demotion ignores all cpuset and mempolicy settings */
- migrate_pages(demote_folios, alloc_demote_folio, NULL,
- (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
- &nr_succeeded);
-
- return nr_succeeded;
-}
-
static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
{
if (gfp_mask & __GFP_FS)
@@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
/* 'folio_list' is always empty here */
/* Migrate folios selected for demotion */
- nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
+ nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
nr_reclaimed += nr_demoted;
stat->nr_demoted += nr_demoted;
/* Folios that could not be demoted are still in @demote_folios */