[PATCH] mm: add memory.compact_unevictable_allowed cgroup attribute
From: Daniil Tatianin
Date: Tue Mar 17 2026 - 06:06:40 EST
The current global sysctl compact_unevictable_allowed is too coarse.
In environments with mixed workloads, we may want to protect specific
important cgroups from compaction to ensure their stability and
responsiveness, while allowing compaction for others.
This patch introduces a per-memcg compact_unevictable_allowed attribute.
This allows granular control over whether unevictable pages in a specific
cgroup can be compacted. The global sysctl still takes precedence if set
to disallow compaction, but this new setting allows opting out specific
cgroups.
This also adds a new ISOLATE_UNEVICTABLE_CHECK_MEMCG flag to
isolate_migratepages_block to preserve the old behavior for the
ISOLATE_UNEVICTABLE flag unconditionally used by
isolage_migratepages_range.
Signed-off-by: Daniil Tatianin <d-tatianin@xxxxxxxxxxxxxx>
---
include/linux/memcontrol.h | 19 ++++++++++++++++++
include/linux/mmzone.h | 5 +++++
mm/compaction.c | 21 +++++++++++++++++---
mm/memcontrol.c | 40 ++++++++++++++++++++++++++++++++++++++
4 files changed, 82 insertions(+), 3 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 70b685a85bf4..13b7ef6cf511 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -227,6 +227,12 @@ struct mem_cgroup {
*/
bool oom_group;
+ /*
+ * Is compaction allowed to take unevictable pages accounted to
+ * this cgroup?
+ */
+ bool compact_unevictable_allowed;
+
int swappiness;
/* memory.events and memory.events.local */
@@ -640,6 +646,14 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
page_counter_read(&memcg->memory);
}
+static inline bool mem_cgroup_compact_unevictable_allowed(struct mem_cgroup *memcg)
+{
+ if (mem_cgroup_disabled() || !memcg)
+ return true;
+
+ return READ_ONCE(memcg->compact_unevictable_allowed);
+}
+
int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
/**
@@ -1092,6 +1106,11 @@ static inline bool mem_cgroup_disabled(void)
return true;
}
+static inline bool mem_cgroup_compact_unevictable_allowed(struct mem_cgroup *memcg)
+{
+ return true;
+}
+
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3e51190a55e4..dadc9b66efa1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -701,6 +701,11 @@ struct lruvec {
#define ISOLATE_ASYNC_MIGRATE ((__force isolate_mode_t)0x4)
/* Isolate unevictable pages */
#define ISOLATE_UNEVICTABLE ((__force isolate_mode_t)0x8)
+/*
+ * Isolate unevictable pages, but honor the page's cgroup settings if it
+ * explicitly disallows unevictable isolation.
+ */
+#define ISOLATE_UNEVICTABLE_CHECK_MEMCG ((__force isolate_mode_t)0x10)
/* LRU Isolation modes. */
typedef unsigned __bitwise isolate_mode_t;
diff --git a/mm/compaction.c b/mm/compaction.c
index 1e8f8eca318c..0dbb81aa5d2e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1098,8 +1098,22 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
is_unevictable = folio_test_unevictable(folio);
/* Compaction might skip unevictable pages but CMA takes them */
- if (!(mode & ISOLATE_UNEVICTABLE) && is_unevictable)
- goto isolate_fail_put;
+ if (is_unevictable) {
+ if (mode & ISOLATE_UNEVICTABLE_CHECK_MEMCG) {
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ memcg = folio_memcg_check(folio);
+
+ if (!mem_cgroup_compact_unevictable_allowed(memcg)) {
+ rcu_read_unlock();
+ goto isolate_fail_put;
+ }
+
+ rcu_read_unlock();
+ } else if (!(mode & ISOLATE_UNEVICTABLE))
+ goto isolate_fail_put;
+ }
/*
* To minimise LRU disruption, the caller can indicate with
@@ -2049,7 +2063,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
unsigned long low_pfn;
struct page *page;
const isolate_mode_t isolate_mode =
- (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
+ (sysctl_compact_unevictable_allowed ?
+ ISOLATE_UNEVICTABLE_CHECK_MEMCG : 0) |
(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
bool fast_find_block;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 772bac21d155..bd0230d93dd8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3839,6 +3839,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
WRITE_ONCE(memcg->zswap_writeback, true);
#endif
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
+ WRITE_ONCE(memcg->compact_unevictable_allowed,
+ mem_cgroup_compact_unevictable_allowed(parent));
if (parent) {
WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
@@ -4608,6 +4610,37 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
+static int memory_compact_unevictable_allowed_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+ seq_printf(m, "%d\n", READ_ONCE(memcg->compact_unevictable_allowed));
+
+ return 0;
+}
+
+static ssize_t memory_compact_unevictable_allowed_write(
+ struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ int ret, allowed;
+
+ buf = strstrip(buf);
+ if (!buf)
+ return -EINVAL;
+
+ ret = kstrtoint(buf, 0, &allowed);
+ if (ret)
+ return ret;
+
+ if (allowed != 0 && allowed != 1)
+ return -EINVAL;
+
+ WRITE_ONCE(memcg->compact_unevictable_allowed, allowed);
+
+ return nbytes;
+}
+
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
@@ -4692,6 +4725,13 @@ static struct cftype memory_files[] = {
.flags = CFTYPE_NS_DELEGATABLE,
.write = memory_reclaim,
},
+ {
+ .name = "compact_unevictable_allowed",
+ /* For root use /proc/sys/vm/compact_unevictable_allowed */
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = memory_compact_unevictable_allowed_show,
+ .write = memory_compact_unevictable_allowed_write,
+ },
{ } /* terminate */
};
--
2.34.1