[PATCH] btrfs: fix subpage locked bitmap leak in cow_fixup writeback path
From: Werner Kasselman
Date: Mon Mar 16 2026 - 05:58:35 EST
|
Hi, While auditing the btrfs subpage writeback path in 7.0-rc4, I found a locked bitmap leak in extent_writepage_io() when btrfs_writepage_cow_fixup() returns -EAGAIN. Since commit c96d0e392141 ("btrfs: mark all dirty sectors as locked inside writepage_delalloc()"), writepage_delalloc() calls btrfs_folio_set_lock() for all dirty sectors, setting bits in the subpage locked bitmap and incrementing nr_locked.
These are normally cleaned up by btrfs_folio_end_lock_bitmap() at the end of extent_writepage(). However, when cow_fixup returns -EAGAIN, extent_writepage_io() calls plain folio_unlock() and returns 1. This causes extent_writepage() to skip btrfs_folio_end_lock_bitmap() entirely. The locked bitmap bits and nr_locked counter are leaked. When writeback retries the folio, btrfs_folio_set_lock() tries to set the same bits again and hits the ASSERT at subpage.c:746. The fix replaces folio_unlock() with btrfs_folio_end_lock_bitmap(), which clears the locked bitmap bits before unlocking. For non-subpage configurations, btrfs_folio_end_lock_bitmap() falls through to plain folio_unlock(), so behavior
is unchanged. This affects subpage configurations (sectorsize < PAGE_SIZE, e.g. 4K sector on 64K-page ARM64) when the cow_fixup path triggers, which happens when a page is dirtied via GUP/pin_user_pages without going through the filesystem. I also wrote a standalone bitmap state machine simulation that reproduces the invariant violation and verifies the fix: tools/testing/btrfs/test-subpage-cow-fixup.c $ cc -o test tools/testing/btrfs/test-subpage-cow-fixup.c && ./test TEST 1: Buggy path (folio_unlock) ... PASS (bug reproduced) TEST 2: Fixed path (end_lock_bitmap) ... PASS TEST 3: Non-subpage path (both behave) ... PASS TEST 4: Partial async + cow_fixup (buggy) ... PASS (bug reproduced) TEST 5: Partial async + cow_fixup (fixed) ... PASS Please review. Thanks, Werner |
Attachment:
0001-btrfs-fix-subpage-locked-bitmap-leak-in-cow_fixup-wr.patch
Description: 0001-btrfs-fix-subpage-locked-bitmap-leak-in-cow_fixup-wr.patch
/*
* Standalone test for btrfs subpage locked bitmap leak in cow_fixup path.
*
* Simulates the bitmap state machine from fs/btrfs/subpage.c and the
* writeback sequence from fs/btrfs/extent_io.c to demonstrate that
* folio_unlock() in extent_writepage_io()'s -EAGAIN path leaks locked
* bitmap bits, causing an ASSERT in btrfs_folio_set_lock() on retry.
*
* Usage:
* cc -o test-subpage-cow-fixup test-subpage-cow-fixup.c && ./test-subpage-cow-fixup
*
* Expected output:
* TEST 1: Buggy path (folio_unlock) ... FAIL (leaked bitmap state)
* TEST 2: Fixed path (end_lock_bitmap) ... PASS
* TEST 3: Non-subpage path (both behave) ... PASS
* TEST 4: Partial async + cow_fixup (buggy) ... FAIL (leaked bitmap state)
* TEST 5: Partial async + cow_fixup (fixed) ... PASS
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
/*
* Minimal simulation of struct btrfs_folio_state from fs/btrfs/subpage.h.
*
* In the kernel, bitmaps[] holds interleaved per-type bitmaps (uptodate,
* dirty, writeback, ordered, checked, locked). We only model the locked
* bitmap since that's what the bug affects.
*/
#define SECTORS_PER_PAGE 16 /* 64K page / 4K sector */
#define BITMAP_NR_LOCKED 5 /* btrfs_bitmap_nr_locked */
struct sim_folio_state {
int nr_locked;
unsigned long bitmaps[1]; /* simplified: one unsigned long is enough */
bool folio_locked;
};
/* Bit offset for the locked bitmap segment. */
static inline int locked_start_bit(void)
{
return SECTORS_PER_PAGE * BITMAP_NR_LOCKED;
}
/*
* Simulates btrfs_folio_set_lock() from subpage.c:726-751.
* Sets locked bitmap bits and increments nr_locked.
* Returns false if ASSERT would fire (bits already set).
*/
static bool sim_folio_set_lock(struct sim_folio_state *bfs,
unsigned long submit_bitmap)
{
int start = locked_start_bit();
for (int bit = 0; bit < SECTORS_PER_PAGE; bit++) {
if (!(submit_bitmap & (1UL << bit)))
continue;
/* This is the ASSERT at subpage.c:744-746. */
if (bfs->bitmaps[0] & (1UL << (bit + start))) {
/* ASSERT fires: bit already set from previous attempt */
return false;
}
bfs->bitmaps[0] |= (1UL << (bit + start));
bfs->nr_locked++;
}
return true;
}
/*
* Simulates btrfs_folio_end_lock_bitmap() from subpage.c:307-341.
* Clears locked bitmap bits and decrements nr_locked.
* Unlocks folio if nr_locked reaches 0.
*/
static void sim_folio_end_lock_bitmap(struct sim_folio_state *bfs,
unsigned long bitmap, bool is_subpage)
{
int start = locked_start_bit();
int cleared = 0;
/* Non-subpage: just unlock. (subpage.c:318-320) */
if (!is_subpage) {
bfs->folio_locked = false;
return;
}
/* nr_locked == 0: plain lock_page() case. (subpage.c:323-326) */
if (bfs->nr_locked == 0) {
bfs->folio_locked = false;
return;
}
/* Clear bits and count. (subpage.c:329-337) */
for (int bit = 0; bit < SECTORS_PER_PAGE; bit++) {
if (!(bitmap & (1UL << bit)))
continue;
if (bfs->bitmaps[0] & (1UL << (bit + start))) {
bfs->bitmaps[0] &= ~(1UL << (bit + start));
cleared++;
}
}
assert(bfs->nr_locked >= cleared);
bfs->nr_locked -= cleared;
if (bfs->nr_locked == 0)
bfs->folio_locked = false;
}
/* Simulates plain folio_unlock() -- does NOT touch bitmap state. */
static void sim_folio_unlock(struct sim_folio_state *bfs)
{
bfs->folio_locked = false;
}
static void init_folio(struct sim_folio_state *bfs)
{
memset(bfs, 0, sizeof(*bfs));
bfs->folio_locked = true;
}
static bool bitmap_is_clean(struct sim_folio_state *bfs)
{
return bfs->nr_locked == 0 && bfs->bitmaps[0] == 0;
}
/*
* TEST 1: Buggy path -- folio_unlock() in cow_fixup -EAGAIN.
*
* Sequence:
* writepage_delalloc:1468 btrfs_folio_set_lock(submit_bitmap)
* extent_writepage_io:1745 btrfs_writepage_cow_fixup() returns -EAGAIN
* extent_writepage_io:1749 folio_unlock() <-- BUG: skips bitmap
* extent_writepage:1902 return 0 <-- skips cleanup
* ... fixup worker completes, writeback retries ...
* writepage_delalloc:1468 btrfs_folio_set_lock() <-- ASSERT fires
*/
static int test_buggy_cow_fixup(void)
{
struct sim_folio_state bfs;
unsigned long submit_bitmap = 0x000f; /* 4 dirty sectors */
/* First writeback attempt. */
init_folio(&bfs);
sim_folio_set_lock(&bfs, submit_bitmap);
/* cow_fixup returns -EAGAIN, buggy path uses folio_unlock(). */
sim_folio_unlock(&bfs);
/* Folio is unlocked but bitmap state leaked. */
if (bfs.nr_locked != 0 || bfs.bitmaps[0] != 0) {
/* Writeback retries: btrfs_folio_set_lock() on same bits. */
bfs.folio_locked = true;
if (!sim_folio_set_lock(&bfs, submit_bitmap)) {
/* ASSERT fired -- bug reproduced. */
return 1;
}
}
return 0;
}
/*
* TEST 2: Fixed path -- btrfs_folio_end_lock_bitmap() in cow_fixup -EAGAIN.
*
* Same sequence but with the patched code.
*/
static int test_fixed_cow_fixup(void)
{
struct sim_folio_state bfs;
unsigned long submit_bitmap = 0x000f;
init_folio(&bfs);
sim_folio_set_lock(&bfs, submit_bitmap);
/* cow_fixup returns -EAGAIN, fixed path uses end_lock_bitmap(). */
sim_folio_end_lock_bitmap(&bfs, submit_bitmap, true);
/* Bitmap state should be clean. */
if (!bitmap_is_clean(&bfs))
return 0; /* fail: state not clean */
/* Writeback retries: btrfs_folio_set_lock() should succeed. */
bfs.folio_locked = true;
if (!sim_folio_set_lock(&bfs, submit_bitmap))
return 0; /* fail: ASSERT would fire */
/* Clean up and verify. */
sim_folio_end_lock_bitmap(&bfs, submit_bitmap, true);
return bitmap_is_clean(&bfs) ? 1 : 0;
}
/*
* TEST 3: Non-subpage -- both paths behave identically.
*
* For non-subpage, btrfs_folio_set_lock() is never called, so nr_locked == 0.
* Both folio_unlock() and btrfs_folio_end_lock_bitmap() just unlock.
*/
static int test_non_subpage(void)
{
struct sim_folio_state bfs;
unsigned long submit_bitmap = 0x0001; /* single page = 1 bit */
/* Non-subpage: no btrfs_folio_set_lock() call. */
init_folio(&bfs);
/* Both paths should just unlock. */
sim_folio_end_lock_bitmap(&bfs, submit_bitmap, false);
if (bfs.folio_locked)
return 0;
init_folio(&bfs);
sim_folio_unlock(&bfs);
if (bfs.folio_locked)
return 0;
return 1;
}
/*
* TEST 4: Partial async + cow_fixup (buggy).
*
* writepage_delalloc() handles sectors 0-3 via compression (async, ret > 0),
* clears those from submit_bitmap. Sectors 4-7 remain in submit_bitmap.
* Then cow_fixup fires and the buggy path leaks sectors 4-7's locked bits.
*/
static int test_partial_async_buggy(void)
{
struct sim_folio_state bfs;
unsigned long submit_bitmap = 0x00ff; /* 8 dirty sectors */
unsigned long async_bits = 0x000f; /* sectors 0-3 async */
init_folio(&bfs);
sim_folio_set_lock(&bfs, submit_bitmap);
/* Async path clears its bits from submit_bitmap. */
submit_bitmap &= ~async_bits;
/* Async completion clears its locked bits. */
sim_folio_end_lock_bitmap(&bfs, async_bits, true);
/* Folio still locked -- sectors 4-7 remain. */
assert(bfs.folio_locked);
assert(bfs.nr_locked == 4);
/* cow_fixup -EAGAIN, buggy path. */
sim_folio_unlock(&bfs);
/* 4 locked bits leaked. */
if (bfs.nr_locked != 4)
return 0;
/* Retry hits ASSERT. */
bfs.folio_locked = true;
if (!sim_folio_set_lock(&bfs, 0x00ff))
return 1; /* ASSERT fired -- bug reproduced */
return 0;
}
/*
* TEST 5: Partial async + cow_fixup (fixed).
*
* Same as test 4 but with the fixed code path.
*/
static int test_partial_async_fixed(void)
{
struct sim_folio_state bfs;
unsigned long submit_bitmap = 0x00ff;
unsigned long async_bits = 0x000f;
init_folio(&bfs);
sim_folio_set_lock(&bfs, submit_bitmap);
submit_bitmap &= ~async_bits;
/* Async completion. */
sim_folio_end_lock_bitmap(&bfs, async_bits, true);
assert(bfs.folio_locked);
assert(bfs.nr_locked == 4);
/* cow_fixup -EAGAIN, fixed path clears remaining bits. */
sim_folio_end_lock_bitmap(&bfs, submit_bitmap, true);
if (!bitmap_is_clean(&bfs))
return 0;
if (bfs.folio_locked)
return 0;
/* Retry succeeds. */
bfs.folio_locked = true;
if (!sim_folio_set_lock(&bfs, 0x00ff))
return 0;
sim_folio_end_lock_bitmap(&bfs, 0x00ff, true);
return bitmap_is_clean(&bfs) ? 1 : 0;
}
struct test_case {
const char *name;
int (*fn)(void);
bool expect_pass; /* true = fn returns 1, false = fn returns 1 (bug repro) */
};
int main(void)
{
struct test_case tests[] = {
{ "Buggy path (folio_unlock)", test_buggy_cow_fixup, false },
{ "Fixed path (end_lock_bitmap)", test_fixed_cow_fixup, true },
{ "Non-subpage path (both behave)", test_non_subpage, true },
{ "Partial async + cow_fixup (buggy)", test_partial_async_buggy, false },
{ "Partial async + cow_fixup (fixed)", test_partial_async_fixed, true },
};
int n = sizeof(tests) / sizeof(tests[0]);
int passed = 0;
int failed = 0;
for (int i = 0; i < n; i++) {
int ret = tests[i].fn();
bool ok;
if (tests[i].expect_pass)
ok = (ret == 1);
else
ok = (ret == 1); /* bug-repro tests return 1 on successful repro */
printf("TEST %d: %-40s ... %s\n", i + 1, tests[i].name,
ok ? "PASS" : "FAIL");
if (ok)
passed++;
else
failed++;
}
printf("\n%d/%d tests passed\n", passed, n);
return failed ? 1 : 0;
}