Re: [PATCH] Revert "ptdesc: remove references to folios from __pagetable_ctor() and pagetable_dtor()"
From: Matthew Wilcox
Date: Wed Feb 25 2026 - 11:25:13 EST
On Wed, Feb 25, 2026 at 04:03:54PM +0000, Matthew Wilcox wrote:
> On Tue, Feb 24, 2026 at 04:24:34PM -0800, Axel Rasmussen wrote:
> > This change swapped out mod_node_page_state for lruvec_stat_add_folio.
> > But, these two APIs are not interchangeable: the lruvec version also
> > increments memcg stats, in addition to "global" pgdat stats.
> >
> > So after this change, the "pagetables" memcg stat in memory.stat always
> > yields "0", which is a userspace visible regression.
> >
> > I tried to look for a refactor where we add a variant of
> > lruvec_stat_mod_folio which takes a pgdat and a memcg instead of a
> > folio, to try to adhere to the spirit of the original patch. But at the
> > end of the day this just means we have to call
> > folio_memcg(ptdesc_folio(ptdesc)) anyway, which doesn't really
> > accomplish much.
>
> Thank you! I hadn't been able to get a straight answer on this before.
>
> You're right that there's no good function to call, but that just means
> we need to make one. The principle here is that (eventually) different
> memdescs don't need to know about each other. Obviously we're not there
> yet, but we can start disentangling them by not casting ptdescs back to
> folios (even though they're created that way).
>
> Here's three patches smooshed together; I have them separately and I'll
> post them soon.
Argh, fatfingered the inclusion and ended up sending ...
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5be3d8a8f806..34bc6f00ed7b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3519,21 +3519,32 @@ static inline unsigned long ptdesc_nr_pages(const struct ptdesc *ptdesc)
return compound_nr(ptdesc_page(ptdesc));
}
+static inline struct mem_cgroup *pagetable_memcg(const struct ptdesc *ptdesc)
+{
+#ifdef CONFIG_MEMCG
+ return ptdesc->pt_memcg;
+#else
+ return NULL;
+#endif
+}
+
static inline void __pagetable_ctor(struct ptdesc *ptdesc)
{
pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+ struct mem_cgroup *memcg = pagetable_memcg(ptdesc);
__SetPageTable(ptdesc_page(ptdesc));
- mod_node_page_state(pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
+ memcg_stat_mod(memcg, pgdat, NR_PAGETABLE, ptdesc_nr_pages(ptdesc));
}
static inline void pagetable_dtor(struct ptdesc *ptdesc)
{
pg_data_t *pgdat = NODE_DATA(memdesc_nid(ptdesc->pt_flags));
+ struct mem_cgroup *memcg = pagetable_memcg(ptdesc);
ptlock_free(ptdesc);
__ClearPageTable(ptdesc_page(ptdesc));
- mod_node_page_state(pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
+ memcg_stat_mod(memcg, pgdat, NR_PAGETABLE, -ptdesc_nr_pages(ptdesc));
}
static inline void pagetable_dtor_free(struct ptdesc *ptdesc)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3cc8ae722886..e9b1da04938a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -564,7 +564,7 @@ FOLIO_MATCH(compound_head, _head_3);
* @ptl: Lock for the page table.
* @__page_type: Same as page->page_type. Unused for page tables.
* @__page_refcount: Same as page refcount.
- * @pt_memcg_data: Memcg data. Tracked for page tables here.
+ * @pt_memcg: Memcg that this page table belongs to.
*
* This struct overlays struct page for now. Do not modify without a good
* understanding of the issues.
@@ -602,7 +602,7 @@ struct ptdesc {
unsigned int __page_type;
atomic_t __page_refcount;
#ifdef CONFIG_MEMCG
- unsigned long pt_memcg_data;
+ struct mem_cgroup *pt_memcg;
#endif
};
@@ -617,7 +617,7 @@ TABLE_MATCH(rcu_head, pt_rcu_head);
TABLE_MATCH(page_type, __page_type);
TABLE_MATCH(_refcount, __page_refcount);
#ifdef CONFIG_MEMCG
-TABLE_MATCH(memcg_data, pt_memcg_data);
+TABLE_MATCH(memcg_data, pt_memcg);
#endif
#undef TABLE_MATCH
static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3c9c266cf782..0da38ea25c97 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -518,7 +518,8 @@ static inline const char *vm_event_name(enum vm_event_item item)
void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
-
+void memcg_stat_mod(struct mem_cgroup *memcg, pg_data_t *pgdat,
+ enum node_stat_item idx, long val);
void lruvec_stat_mod_folio(struct folio *folio,
enum node_stat_item idx, int val);
@@ -536,6 +537,12 @@ static inline void mod_lruvec_state(struct lruvec *lruvec,
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}
+static inline void memcg_stat_mod(struct mem_cgroup *memcg, pg_data_t *pgdat,
+ enum node_stat_item idx, long val)
+{
+ mod_node_page_state(pgdat, idx, val);
+}
+
static inline void lruvec_stat_mod_folio(struct folio *folio,
enum node_stat_item idx, int val)
{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a52da3a5e4fd..8d9e4a42aecf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -787,24 +787,27 @@ void mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
mod_memcg_lruvec_state(lruvec, idx, val);
}
+void memcg_stat_mod(struct mem_cgroup *memcg, pg_data_t *pgdat,
+ enum node_stat_item idx, long val)
+{
+ /* Untracked pages have no memcg, no lruvec. Update only the node */
+ if (!memcg) {
+ mod_node_page_state(pgdat, idx, val);
+ } else {
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ mod_lruvec_state(lruvec, idx, val);
+ }
+}
+
void lruvec_stat_mod_folio(struct folio *folio, enum node_stat_item idx,
int val)
{
struct mem_cgroup *memcg;
pg_data_t *pgdat = folio_pgdat(folio);
- struct lruvec *lruvec;
rcu_read_lock();
memcg = folio_memcg(folio);
- /* Untracked pages have no memcg, no lruvec. Update only the node */
- if (!memcg) {
- rcu_read_unlock();
- mod_node_page_state(pgdat, idx, val);
- return;
- }
-
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- mod_lruvec_state(lruvec, idx, val);
+ memcg_stat_mod(memcg, pgdat, idx, val);
rcu_read_unlock();
}
EXPORT_SYMBOL(lruvec_stat_mod_folio);
@@ -812,24 +815,9 @@ EXPORT_SYMBOL(lruvec_stat_mod_folio);
void mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
{
pg_data_t *pgdat = page_pgdat(virt_to_page(p));
- struct mem_cgroup *memcg;
- struct lruvec *lruvec;
rcu_read_lock();
- memcg = mem_cgroup_from_virt(p);
-
- /*
- * Untracked pages have no memcg, no lruvec. Update only the
- * node. If we reparent the slab objects to the root memcg,
- * when we free the slab object, we need to update the per-memcg
- * vmstats to keep it correct for the root memcg.
- */
- if (!memcg) {
- mod_node_page_state(pgdat, idx, val);
- } else {
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- mod_lruvec_state(lruvec, idx, val);
- }
+ memcg_stat_mod(mem_cgroup_from_virt(p), pgdat, idx, val);
rcu_read_unlock();
}