Re: [PATCH 1/4] mm: Move demotion related functions in memory-tiers.c

From: Alexandre Ghiti

Date: Fri Mar 13 2026 - 10:27:43 EST


Hi Tom,

On 3/12/26 09:44, Donet Tom wrote:

Hi Alexander

On 3/11/26 4:32 PM, Alexandre Ghiti wrote:
Let's have all the demotion functions in this file, no functional
change intended.

Suggested-by: Gregory Price <gourry@xxxxxxxxxx>
Signed-off-by: Alexandre Ghiti <alex@xxxxxxxx>
---
  include/linux/memory-tiers.h | 18 ++++++++
  mm/memory-tiers.c            | 75 +++++++++++++++++++++++++++++++++
  mm/vmscan.c                  | 80 +-----------------------------------
  3 files changed, 94 insertions(+), 79 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 96987d9d95a8..0bf0d002939e 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -56,6 +56,9 @@ void mt_put_memory_types(struct list_head *memory_types);
  int next_demotion_node(int node, const nodemask_t *allowed_mask);
  void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
  bool node_is_toptier(int node);
+unsigned int mt_demote_folios(struct list_head *demote_folios,
+                  struct pglist_data *pgdat,
+                  struct mem_cgroup *memcg);
  #else
  static inline int next_demotion_node(int node, const nodemask_t *allowed_mask)
  {
@@ -71,6 +74,14 @@ static inline bool node_is_toptier(int node)
  {
      return true;
  }
+
+static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+                        struct pglist_data *pgdat,
+                        struct mem_cgroup *memcg)
+{
+    return 0;
+}
+
  #endif
    #else
@@ -116,6 +127,13 @@ static inline bool node_is_toptier(int node)
      return true;
  }
  +static inline unsigned int mt_demote_folios(struct list_head *demote_folios,
+                        struct pglist_data *pgdat,
+                        struct mem_cgroup *memcg)
+{
+    return 0;
+}
+
  static inline int register_mt_adistance_algorithm(struct notifier_block *nb)
  {
      return 0;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 986f809376eb..afdf21738a54 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -7,6 +7,7 @@
  #include <linux/memory-tiers.h>
  #include <linux/notifier.h>
  #include <linux/sched/sysctl.h>
+#include <linux/migrate.h>
    #include "internal.h"
  @@ -373,6 +374,80 @@ int next_demotion_node(int node, const nodemask_t *allowed_mask)
      return find_next_best_node(node, &mask);
  }
  +static struct folio *alloc_demote_folio(struct folio *src,
+                    unsigned long private)
+{
+    struct folio *dst;
+    nodemask_t *allowed_mask;
+    struct migration_target_control *mtc;
+
+    mtc = (struct migration_target_control *)private;
+
+    allowed_mask = mtc->nmask;
+    /*
+     * make sure we allocate from the target node first also trying to
+     * demote or reclaim pages from the target node via kswapd if we are
+     * low on free memory on target node. If we don't do this and if
+     * we have free memory on the slower(lower) memtier, we would start
+     * allocating pages from slower(lower) memory tiers without even forcing
+     * a demotion of cold pages from the target memtier. This can result
+     * in the kernel placing hot pages in slower(lower) memory tiers.
+     */
+    mtc->nmask = NULL;
+    mtc->gfp_mask |= __GFP_THISNODE;
+    dst = alloc_migration_target(src, (unsigned long)mtc);
+    if (dst)
+        return dst;
+
+    mtc->gfp_mask &= ~__GFP_THISNODE;
+    mtc->nmask = allowed_mask;
+
+    return alloc_migration_target(src, (unsigned long)mtc);
+}
+
+unsigned int mt_demote_folios(struct list_head *demote_folios,


Demotion will happen only when different memory tiers are present, right? Since demote_folios() already implies that the folios are being demoted to a lower tier, is the mt_ prefix needed in the function name? I’m fine with keeping it as is, but I just wanted to clarify.


You're right, demote implies some memory tiers. But I like the mt_ prefix, some functions in memory-tiers.c already have this prefix so it adds consistency: so since you don't mind, I'll keep it :)



Otherwise it LGTM

Reviewed by: Donet Tom <donettom@xxxxxxxxxxxxx>


Thanks for your time!

Alex



+                  struct pglist_data *pgdat,
+                  struct mem_cgroup *memcg)
+{
+    int target_nid;
+    unsigned int nr_succeeded;
+    nodemask_t allowed_mask;
+
+    struct migration_target_control mtc = {
+        /*
+         * Allocate from 'node', or fail quickly and quietly.
+         * When this happens, 'page' will likely just be discarded
+         * instead of migrated.
+         */
+        .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
+            __GFP_NOMEMALLOC | GFP_NOWAIT,
+        .nmask = &allowed_mask,
+        .reason = MR_DEMOTION,
+    };
+
+    if (list_empty(demote_folios))
+        return 0;
+
+    node_get_allowed_targets(pgdat, &allowed_mask);
+    mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
+    if (nodes_empty(allowed_mask))
+        return 0;
+
+    target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
+    if (target_nid == NUMA_NO_NODE)
+        /* No lower-tier nodes or nodes were hot-unplugged. */
+        return 0;
+
+    mtc.nid = target_nid;
+
+    /* Demotion ignores all cpuset and mempolicy settings */
+    migrate_pages(demote_folios, alloc_demote_folio, NULL,
+            (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
+            &nr_succeeded);
+
+    return nr_succeeded;
+}
+
  static void disable_all_demotion_targets(void)
  {
      struct memory_tier *memtier;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0fc9373e8251..5e0138b94480 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -983,84 +983,6 @@ static void folio_check_dirty_writeback(struct folio *folio,
          mapping->a_ops->is_dirty_writeback(folio, dirty, writeback);
  }
  -static struct folio *alloc_demote_folio(struct folio *src,
-        unsigned long private)
-{
-    struct folio *dst;
-    nodemask_t *allowed_mask;
-    struct migration_target_control *mtc;
-
-    mtc = (struct migration_target_control *)private;
-
-    allowed_mask = mtc->nmask;
-    /*
-     * make sure we allocate from the target node first also trying to
-     * demote or reclaim pages from the target node via kswapd if we are
-     * low on free memory on target node. If we don't do this and if
-     * we have free memory on the slower(lower) memtier, we would start
-     * allocating pages from slower(lower) memory tiers without even forcing
-     * a demotion of cold pages from the target memtier. This can result
-     * in the kernel placing hot pages in slower(lower) memory tiers.
-     */
-    mtc->nmask = NULL;
-    mtc->gfp_mask |= __GFP_THISNODE;
-    dst = alloc_migration_target(src, (unsigned long)mtc);
-    if (dst)
-        return dst;
-
-    mtc->gfp_mask &= ~__GFP_THISNODE;
-    mtc->nmask = allowed_mask;
-
-    return alloc_migration_target(src, (unsigned long)mtc);
-}
-
-/*
- * Take folios on @demote_folios and attempt to demote them to another node.
- * Folios which are not demoted are left on @demote_folios.
- */
-static unsigned int demote_folio_list(struct list_head *demote_folios,
-                      struct pglist_data *pgdat,
-                      struct mem_cgroup *memcg)
-{
-    int target_nid;
-    unsigned int nr_succeeded;
-    nodemask_t allowed_mask;
-
-    struct migration_target_control mtc = {
-        /*
-         * Allocate from 'node', or fail quickly and quietly.
-         * When this happens, 'page' will likely just be discarded
-         * instead of migrated.
-         */
-        .gfp_mask = (GFP_HIGHUSER_MOVABLE & ~__GFP_RECLAIM) |
-            __GFP_NOMEMALLOC | GFP_NOWAIT,
-        .nmask = &allowed_mask,
-        .reason = MR_DEMOTION,
-    };
-
-    if (list_empty(demote_folios))
-        return 0;
-
-    node_get_allowed_targets(pgdat, &allowed_mask);
-    mem_cgroup_node_filter_allowed(memcg, &allowed_mask);
-    if (nodes_empty(allowed_mask))
-        return 0;
-
-    target_nid = next_demotion_node(pgdat->node_id, &allowed_mask);
-    if (target_nid == NUMA_NO_NODE)
-        /* No lower-tier nodes or nodes were hot-unplugged. */
-        return 0;
-
-    mtc.nid = target_nid;
-
-    /* Demotion ignores all cpuset and mempolicy settings */
-    migrate_pages(demote_folios, alloc_demote_folio, NULL,
-              (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
-              &nr_succeeded);
-
-    return nr_succeeded;
-}
-
  static bool may_enter_fs(struct folio *folio, gfp_t gfp_mask)
  {
      if (gfp_mask & __GFP_FS)
@@ -1573,7 +1495,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list,
      /* 'folio_list' is always empty here */
        /* Migrate folios selected for demotion */
-    nr_demoted = demote_folio_list(&demote_folios, pgdat, memcg);
+    nr_demoted = mt_demote_folios(&demote_folios, pgdat, memcg);
      nr_reclaimed += nr_demoted;
      stat->nr_demoted += nr_demoted;
      /* Folios that could not be demoted are still in @demote_folios */