On Thu, Jul 15, 2021 at 06:34:03PM +0800, Desmond Cheong Zhi Xi wrote:
Syzbot reports a warning in close_fs_devices that happens because
fs_devices->rw_devices is not 0 after calling btrfs_close_one_device
on each device.
This happens when a writeable device is removed in
__btrfs_free_extra_devids, but the rw device count is not decremented
accordingly. So when close_fs_devices is called, the removed device is
still counted and we get an off by 1 error.
Here is one call trace that was observed:
btrfs_mount_root():
btrfs_scan_one_device():
device_list_add(); <---------------- device added
btrfs_open_devices():
open_fs_devices():
btrfs_open_one_device(); <-------- rw device count ++
btrfs_fill_super():
open_ctree():
btrfs_free_extra_devids():
__btrfs_free_extra_devids(); <--- device removed
fail_tree_roots:
btrfs_close_devices():
close_fs_devices(); <------- rw device count off by 1
Fixes: cf89af146b7e ("btrfs: dev-replace: fail mount if we don't have replace item with target device")
What this patch did in the last hunk was the rw_devices decrement, but
conditional:
@@ -1080,9 +1071,6 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
list_del_init(&device->dev_alloc_list);
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
- &device->dev_state))
- fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
---
@@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
list_del_init(&device->dev_alloc_list);
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+ fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
So should it be reinstated in the original form? The rest of
cf89af146b7e handles unexpected device replace item during mount.
Adding the decrement is correct, but right now I'm not sure about the
corner case when teh devcie has the BTRFS_DEV_STATE_REPLACE_TGT bit set.
The state machine of the device bits and counters is not trivial so
fixing it one way or the other could lead to further syzbot reports if
we don't understand the issue.
@@ -536,7 +553,8 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
if (device->writeable) {
list_del_init(&device->dev_alloc_list);
device->writeable = 0;
- fs_devices->rw_devices--;
+ if (!device->is_tgtdev_for_dev_replace)
+ fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index eea26e1b2fda..fb0a7fa2f70c 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -562,6 +562,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
if (fs_info->fs_devices->latest_bdev == src_device->bdev)
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
+ if (src_device->fs_devices->seeding)
+ fs_info->fs_devices->rw_devices++;
/* replace the sysfs entry */
btrfs_kobj_rm_device(fs_info, src_device);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index e9cbbdb72978..6f662b34ba0e 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -569,8 +569,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
if (fs_info->fs_devices->latest_bdev == src_device->bdev)
fs_info->fs_devices->latest_bdev = tgt_device->bdev;
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
- if (src_device->fs_devices->seeding)
- fs_info->fs_devices->rw_devices++;
+ fs_info->fs_devices->rw_devices++;
/* replace the sysfs entry */
btrfs_kobj_rm_device(fs_info, src_device);
- if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
- /*
- * In the first step, keep the device which has
- * the correct fsid and the devid that is used
- * for the dev_replace procedure.
- * In the second step, the dev_replace state is
- * read from the device tree and it is known
- * whether the procedure is really active or
- * not, which means whether this device is
- * used or whether it should be removed.
- */
- if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
- &device->dev_state)) {
- continue;
- }
- }
+ /*
+ * We have already validated the presence of BTRFS_DEV_REPLACE_DEVID,
+ * in btrfs_init_dev_replace() so just continue.
+ */
+ if (device->devid == BTRFS_DEV_REPLACE_DEVID)
+ continue;