On Wed, Oct 12, 2022 at 08:58:38PM +0800, Yu Kuai wrote:
From: Yu Kuai <yukuai3@xxxxxxxxxx>
Our test report a following uaf:
==================================================================
BUG: KASAN: use-after-free in sysfs_remove_link+0x23/0x60
Read of size 8 at addr ffff888117398db0 by task dmsetup/1097
CPU: 12 PID: 1097 Comm: dmsetup Not tainted 6.0.0-next-20221007-00011-gf46c8956
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-4
Call Trace:
<TASK>
? dump_stack_lvl+0x73/0x9f
? print_report+0x249/0x746
? __virt_addr_valid+0xd4/0x200
? sysfs_remove_link+0x23/0x60
? kasan_report+0xc0/0x120
? sysfs_remove_link+0x23/0x60
? __asan_load8+0x74/0x110
? sysfs_remove_link+0x23/0x60
? __unlink_disk_holder.isra.0+0x2f/0x80
? bd_unlink_disk_holder+0xd2/0x1c0
? dm_put_table_device+0xf1/0x250
? dm_put_device+0x14f/0x230
? linear_dtr+0x34/0x50
? dm_table_destroy+0x7b/0x280
? table_load+0x34a/0x710
? list_devices+0x4c0/0x4c0
? kvmalloc_node+0x7d/0x160
? __kmalloc_node+0x185/0x2b0
? ctl_ioctl+0x388/0x7b0
? list_devices+0x4c0/0x4c0
? free_params+0x50/0x50
? do_vfs_ioctl+0x931/0x10d0
? tick_program_event+0x65/0xd0
? __kasan_check_read+0x1d/0x30
? __fget_light+0xc2/0x370
? dm_ctl_ioctl+0x12/0x20
? __x64_sys_ioctl+0xd5/0x150
? do_syscall_64+0x35/0x80
? entry_SYSCALL_64_after_hwframe+0x63/0xcd
</TASK>
Allocated by task 1097:
kasan_save_stack+0x26/0x60
kasan_set_track+0x29/0x40
kasan_save_alloc_info+0x1f/0x40
__kasan_kmalloc+0xcb/0xe0
kmalloc_trace+0x7e/0x150
kobject_create_and_add+0x3d/0xc0
device_add_disk+0x429/0x7e0
dm_setup_md_queue+0x15b/0x240
table_load+0x469/0x710
ctl_ioctl+0x388/0x7b0
dm_ctl_ioctl+0x12/0x20
__x64_sys_ioctl+0xd5/0x150
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
Freed by task 1097:
kasan_save_stack+0x26/0x60
kasan_set_track+0x29/0x40
kasan_save_free_info+0x32/0x60
__kasan_slab_free+0x172/0x2c0
__kmem_cache_free+0x11c/0x560
kfree+0xd3/0x240
dynamic_kobj_release+0x1e/0x60
kobject_put+0x192/0x410
device_add_disk+0x535/0x7e0
dm_setup_md_queue+0x15b/0x240
table_load+0x469/0x710
ctl_ioctl+0x388/0x7b0
dm_ctl_ioctl+0x12/0x20
__x64_sys_ioctl+0xd5/0x150
do_syscall_64+0x35/0x80
entry_SYSCALL_64_after_hwframe+0x63/0xcd
This problem is very easy to reporduce by injecting failure while creating
dm, and root cause is that lifetime of bd_holder_dir/slave_dir is
problematic:
1) device alloc
alloc_disk_node
kzalloc_node -> gendisk
disk->part0 = bdev_alloc -> part0
device_initialize
kobject_init() -> part0->bd_device
2) device create
device_add_disk
device_add
kobject_add -> part0->bd_device
kobject_create_and_add -> part0->bd_holder_dir
kobject_create_and_add -> gendisk->slave_dir
3) device remove
del_gendisk
kobject_put -> part0->bd_holder_dir
kobject_put -> gendisk->slave_dir
device_del
kobject_del -> part0->bd_device
4) device free
disk_release
iput
bdev_free_inode
kfree -> gendisk
kmem_cache_free -> part0
bd_holder_dir and slave_dir is allocated in step 2), and freed in steup
3), while they are still accessible before step 4).
This patch delay the kobject destruction to disk_release/part_release to
fix the problem.
Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
block/genhd.c | 7 ++++---
block/partitions/core.c | 3 ++-
drivers/base/core.c | 1 -
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/block/genhd.c b/block/genhd.c
index 514395361d7c..7ebd085d3a3f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -615,9 +615,6 @@ void del_gendisk(struct gendisk *disk)
blk_unregister_queue(disk);
- kobject_put(disk->part0->bd_holder_dir);
- kobject_put(disk->slave_dir);
-
part_stat_set_all(disk->part0, 0);
disk->part0->bd_stamp = 0;
if (!sysfs_deprecated)
@@ -1139,6 +1136,10 @@ static void disk_release(struct device *dev)
might_sleep();
WARN_ON_ONCE(disk_live(disk));
+ kobject_put(disk->part0->bd_holder_dir);
+ kobject_put(disk->slave_dir);
+ kobject_del(&dev->kobj);
+
/*
* To undo the all initialization from blk_mq_init_allocated_queue in
* case of a probe failure where add_disk is never called we have to
diff --git a/block/partitions/core.c b/block/partitions/core.c
index b8112f52d388..b86a66198cc8 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -250,6 +250,8 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
+ kobject_put(dev_to_bdev(dev)->bd_holder_dir);
+ kobject_del(&dev->kobj);
put_disk(dev_to_bdev(dev)->bd_disk);
iput(dev_to_bdev(dev)->bd_inode);
}
@@ -279,7 +281,6 @@ static void delete_partition(struct block_device *part)
__invalidate_device(part, true);
xa_erase(&part->bd_disk->part_tbl, part->bd_partno);
- kobject_put(part->bd_holder_dir);
device_del(&part->bd_device);
/*
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d02501933467..d2ff3d2a8710 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3712,7 +3712,6 @@ void device_del(struct device *dev)
BUS_NOTIFY_REMOVED_DEVICE, dev);
kobject_uevent(&dev->kobj, KOBJ_REMOVE);
glue_dir = get_glue_dir(dev);
- kobject_del(&dev->kobj);
Wait, no, you can't do this without having to change a bunch of other
code too. This feels really wrong, how did you test that this actually
works for all devices in the systems (not just block devices)?
so NAK on this change.
greg k-h
.