Re: [PATCH v3 13/13] selftests/cgroup: extend test_hugetlb_memcg.c to support all huge page sizes

From: Sayali Patil

Date: Fri Apr 03 2026 - 13:19:41 EST




On 27/03/26 12:46, Sayali Patil wrote:
The hugetlb memcg selftest was previously skipped when the configured
huge page size was not 2MB, preventing the test from running on systems
using other default huge page sizes.

Detect the system's configured huge page size at runtime and use it for
the allocation instead of assuming a fixed 2MB size. This allows the
test to run on configurations using non-2MB huge pages and avoids
unnecessary skips.

Fixes: c0dddb7aa5f8 ("selftests: add a selftest to verify hugetlb usage in memcg")
Tested-by: Venkat Rao Bagalkote <venkat88@xxxxxxxxxxxxx>
Signed-off-by: Sayali Patil <sayalip@xxxxxxxxxxxxx>
---
.../selftests/cgroup/test_hugetlb_memcg.c | 66 ++++++++++++++-----
1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
index f451aa449be6..a449dbec16a8 100644
--- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -12,10 +12,15 @@
#define ADDR ((void *)(0x0UL))
#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
-/* mapping 8 MBs == 4 hugepages */
-#define LENGTH (8UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
+/*
+ * This value matches the kernel's MEMCG_CHARGE_BATCH definition:
+ * see include/linux/memcontrol.h. If the kernel value changes, this
+ * test constant must be updated accordingly to stay consistent.
+ */
+#define MEMCG_CHARGE_BATCH 64U
+
/* borrowed from mm/hmm-tests.c */
static long get_hugepage_size(void)
{
@@ -84,11 +89,11 @@ static unsigned int check_first(char *addr)
return *(unsigned int *)addr;
}
-static void write_data(char *addr)
+static void write_data(char *addr, size_t length)
{
unsigned long i;
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < length; i++)
*(addr + i) = (char)i;
}
@@ -96,26 +101,31 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
{
char *test_group = (char *)arg;
void *addr;
+ long hpage_size = get_hugepage_size() * 1024;
long old_current, expected_current, current;
int ret = EXIT_FAILURE;
+ size_t length = 4 * hpage_size;
+ int pagesize, nr_pages;
+
+ pagesize = getpagesize();
old_current = cg_read_long(test_group, "memory.current");
set_nr_hugepages(20);
current = cg_read_long(test_group, "memory.current");
- if (current - old_current >= MB(2)) {
+ if (current - old_current >= hpage_size) {
ksft_print_msg(
"setting nr_hugepages should not increase hugepage usage.\n");
ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
return EXIT_FAILURE;
}
- addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
+ addr = mmap(ADDR, length, PROTECTION, FLAGS, 0, 0);
if (addr == MAP_FAILED) {
ksft_print_msg("fail to mmap.\n");
return EXIT_FAILURE;
}
current = cg_read_long(test_group, "memory.current");
- if (current - old_current >= MB(2)) {
+ if (current - old_current >= hpage_size) {
ksft_print_msg("mmap should not increase hugepage usage.\n");
ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
goto out_failed_munmap;
@@ -124,10 +134,24 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
/* read the first page */
check_first(addr);
- expected_current = old_current + MB(2);
+ nr_pages = hpage_size / pagesize;
+ expected_current = old_current + hpage_size;
current = cg_read_long(test_group, "memory.current");
- if (!values_close(expected_current, current, 5)) {
- ksft_print_msg("memory usage should increase by around 2MB.\n");
+ if (nr_pages < MEMCG_CHARGE_BATCH && current == old_current) {
+ /*
+ * Memory cgroup charging uses per-CPU stocks and batched updates to the
+ * memcg usage counters. For hugetlb allocations, the number of pages
+ * that memcg charges is expressed in base pages (nr_pages), not
+ * in hugepage units. When the charge for an allocation is smaller than
+ * the internal batching threshold (nr_pages < MEMCG_CHARGE_BATCH),
+ * it may be fully satisfied from the CPU’s local stock. In such
+ * cases memory.current does not necessarily
+ * increase.
+ * Therefore, Treat a zero delta as valid behaviour here.
+ */
+ ksft_print_msg("no visible memcg charge, allocation consumed from local stock.\n");
+ } else if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by ~1 huge page.\n");
ksft_print_msg(
"expected memory: %ld, actual memory: %ld\n",
expected_current, current);
@@ -135,11 +159,11 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
}
/* write to the whole range */
- write_data(addr);
+ write_data(addr, length);
current = cg_read_long(test_group, "memory.current");
- expected_current = old_current + MB(8);
+ expected_current = old_current + length;
if (!values_close(expected_current, current, 5)) {
- ksft_print_msg("memory usage should increase by around 8MB.\n");
+ ksft_print_msg("memory usage should increase by around 4 huge pages.\n");
ksft_print_msg(
"expected memory: %ld, actual memory: %ld\n",
expected_current, current);
@@ -147,7 +171,7 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
}
/* unmap the whole range */
- munmap(addr, LENGTH);
+ munmap(addr, length);
current = cg_read_long(test_group, "memory.current");
expected_current = old_current;
if (!values_close(expected_current, current, 5)) {
@@ -162,13 +186,15 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
return ret;
out_failed_munmap:
- munmap(addr, LENGTH);
+ munmap(addr, length);
return ret;
}
static int test_hugetlb_memcg(char *root)
{
int ret = KSFT_FAIL;
+ int num_pages = 20;
+ long hpage_size = get_hugepage_size();
char *test_group;
test_group = cg_name(root, "hugetlb_memcg_test");
@@ -177,7 +203,7 @@ static int test_hugetlb_memcg(char *root)
goto out;
}
- if (cg_write(test_group, "memory.max", "100M")) {
+ if (cg_write_numeric(test_group, "memory.max", num_pages * hpage_size * 1024)) {
ksft_print_msg("fail to set cgroup memory limit.\n");
goto out;
}
@@ -200,6 +226,7 @@ int main(int argc, char **argv)
{
char root[PATH_MAX];
int ret = EXIT_SUCCESS, has_memory_hugetlb_acc;
+ long val;
has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting");
if (has_memory_hugetlb_acc < 0)
@@ -208,12 +235,15 @@ int main(int argc, char **argv)
ksft_exit_skip("memory hugetlb accounting is disabled\n");
/* Unit is kB! */
- if (get_hugepage_size() != 2048) {
- ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n");
+ val = get_hugepage_size();
+ if (val < 0) {
+ ksft_print_msg("Failed to read hugepage size\n");
ksft_test_result_skip("test_hugetlb_memcg\n");
return ret;
}
+ ksft_print_msg("Hugepage size: %ld kB\n", val);
+
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");

AI review question:


> - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
> + addr = mmap(ADDR, length, PROTECTION, FLAGS, 0, 0);
> if (addr == MAP_FAILED) {
> ksft_print_msg("fail to mmap.\n");
> return EXIT_FAILURE;
Should the test gracefully handle memory constraints by skipping
instead of returning EXIT_FAILURE here?
With the removal of the 2MB skip condition, the test will now run on systems
with large default huge pages (e.g., 1GB on x86_64). Reserving 20 huge pages
(20GB) and mapping 4 huge pages (4GB) might easily fail due to memory
exhaustion in typical automated testing environments, causing false test regressions.

I’ll update it in v4 to skip on memory constraints instead of returning EXIT_FAILURE.

> @@ -124,10 +134,24 @@ static int hugetlb_test_program(const char *cgroup, void *arg)
>
> /* read the first page */
> check_first(addr);
> - expected_current = old_current + MB(2);
> + nr_pages = hpage_size / pagesize;
> + expected_current = old_current + hpage_size;
> current = cg_read_long(test_group, "memory.current");
> - if (!values_close(expected_current, current, 5)) {
> - ksft_print_msg("memory usage should increase by around 2MB.\n");
> + if (nr_pages < MEMCG_CHARGE_BATCH && current == old_current) {
> + /*
> + * Memory cgroup charging uses per-CPU stocks and batched updates to the
> + * memcg usage counters. For hugetlb allocations, the number of pages
> + * that memcg charges is expressed in base pages (nr_pages), not
> + * in hugepage units. When the charge for an allocation is smaller than
> + * the internal batching threshold (nr_pages < > MEMCG_CHARGE_BATCH),
> + * it may be fully satisfied from the CPU’s local stock. In such
> + * cases memory.current does not necessarily
> + * increase.
> + * Therefore, Treat a zero delta as valid behaviour here.
> + */
> + ksft_print_msg("no visible memcg charge, allocation consumed from local stock.\n");
> + } else if (!values_close(expected_current, current, 5)) {
Does this exception fully account for memcg stock batching when the per-CPU
stock is empty?
If the stock lacks sufficient pages, charging the huge page triggers a stock
refill that charges exactly MEMCG_CHARGE_BATCH (64) base pages from the global
memcg counter.
On an architecture with 64KB base pages and 2MB contiguous huge pages,
nr_pages is 32. Refilling the stock would charge 64 base pages (4MB), which
increases memory.current by 4MB instead of the expected 2MB. Since
current != old_current, the test falls through to values_close(), which expects
a 2MB increase and may fail the test.

Valid point.
The current exception does not fully account for this
scenario when the per-CPU stock is empty and a refill charges
MEMCG_CHARGE_BATCH. This can indeed lead to a larger-than-expected jump
in memory.current and cause the test to fail.
I’ll update the logic in v4 to handle this case more robustly.

> @@ -177,7 +203,7 @@ static int test_hugetlb_memcg(char *root)
> goto out;
> }
>
> - if (cg_write(test_group, "memory.max", "100M")) {
> + if (cg_write_numeric(test_group, "memory.max", num_pages * > hpage_size * 1024)) {
Can this calculation overflow on 32-bit systems?
Since long is 32 bits on 32-bit systems, num_pages * hpage_size * 1024 can
exceed the 32-bit signed integer maximum if the architecture supports large> huge pages (e.g., 256MB on MIPS).This would evaluate to 5,368,709,120,
resulting in a negative or truncated value, which sets memory.max to an
invalid or overly restrictive limit.

Yes, this can overflow on 32-bit systems. I’ll fix it in v4.