Re: [RFC PATCH v3 8/9] md: Implement ->corrupted_range()

From: Ruan Shiyang
Date: Thu Dec 17 2020 - 21:14:56 EST

Next message: Daejun Park: "RE: Re: Subject: [PATCH v14 1/3] scsi: ufs: Introduce HPB feature"
Previous message: Yonghong Song: "Re: [PATCH] btf: support ints larger than 128 bits"
In reply to: Ruan Shiyang: "Re: [RFC PATCH v3 8/9] md: Implement ->corrupted_range()"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On 2020/12/16 上午4:51, Darrick J. Wong wrote:

On Tue, Dec 15, 2020 at 08:14:13PM +0800, Shiyang Ruan wrote:

With the support of ->rmap(), it is possible to obtain the superblock on
a mapped device.

If a pmem device is used as one target of mapped device, we cannot
obtain its superblock directly. With the help of SYSFS, the mapped
device can be found on the target devices. So, we iterate the
bdev->bd_holder_disks to obtain its mapped device.

Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxxxxx>
---
drivers/md/dm.c | 66 +++++++++++++++++++++++++++++++++++++++++++
drivers/nvdimm/pmem.c | 9 ++++--
fs/block_dev.c | 21 ++++++++++++++
include/linux/genhd.h | 7 +++++
4 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4e0cbfe3f14d..9da1f9322735 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -507,6 +507,71 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
#define dm_blk_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
+struct dm_blk_corrupt {
+ struct block_device *bdev;
+ sector_t offset;
+};
+
+static int dm_blk_corrupt_fn(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_blk_corrupt *bc = data;
+
+ return bc->bdev == (void *)dev->bdev &&
+ (start <= bc->offset && bc->offset < start + len);
+}
+
+static int dm_blk_corrupted_range(struct gendisk *disk,
+ struct block_device *target_bdev,
+ loff_t target_offset, size_t len, void *data)
+{
+ struct mapped_device *md = disk->private_data;
+ struct block_device *md_bdev = md->bdev;
+ struct dm_table *map;
+ struct dm_target *ti;
+ struct super_block *sb;
+ int srcu_idx, i, rc = 0;
+ bool found = false;
+ sector_t disk_sec, target_sec = to_sector(target_offset);
+
+ map = dm_get_live_table(md, &srcu_idx);
+ if (!map)
+ return -ENODEV;
+
+ for (i = 0; i < dm_table_get_num_targets(map); i++) {
+ ti = dm_table_get_target(map, i);
+ if (ti->type->iterate_devices && ti->type->rmap) {
+ struct dm_blk_corrupt bc = {target_bdev, target_sec};
+
+ found = ti->type->iterate_devices(ti, dm_blk_corrupt_fn, &bc);
+ if (!found)
+ continue;
+ disk_sec = ti->type->rmap(ti, target_sec);

What happens if the dm device has multiple reverse mappings because the
physical storage is being shared at multiple LBAs? (e.g. a
deduplication target)

I thought that the dm device knows the mapping relationship, and it can be done by implementation of ->rmap() in each target. Did I understand it wrong?

+ break;
+ }
+ }
+
+ if (!found) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ sb = get_super(md_bdev);
+ if (!sb) {
+ rc = bd_disk_holder_corrupted_range(md_bdev, to_bytes(disk_sec), len, data);
+ goto out;
+ } else if (sb->s_op->corrupted_range) {
+ loff_t off = to_bytes(disk_sec - get_start_sect(md_bdev));
+
+ rc = sb->s_op->corrupted_range(sb, md_bdev, off, len, data);

This "call bd_disk_holder_corrupted_range or sb->s_op->corrupted_range"
logic appears twice; should it be refactored into a common helper?

Or, should the superblock dispatch part move to
bd_disk_holder_corrupted_range?

bd_disk_holder_corrupted_range() requires SYSFS configuration. I introduce it to handle those block devices that can not obtain superblock by `get_super()`.

Usually, if we create filesystem directly on a pmem device, or make some partitions at first, we can use `get_super()` to get the superblock. In other case, such as creating a LVM on pmem device, `get_super()` does not work.

So, I think refactoring it into a common helper looks better.

--
Thanks,
Ruan Shiyang.

+ }
+ drop_super(sb);
+
+out:
+ dm_put_live_table(md, srcu_idx);
+ return rc;
+}
+
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
{
@@ -3084,6 +3149,7 @@ static const struct block_device_operations dm_blk_dops = {
.getgeo = dm_blk_getgeo,
.report_zones = dm_blk_report_zones,
.pr_ops = &dm_pr_ops,
+ .corrupted_range = dm_blk_corrupted_range,
.owner = THIS_MODULE
};
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4688bff19c20..e8cfaf860149 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -267,11 +267,14 @@ static int pmem_corrupted_range(struct gendisk *disk, struct block_device *bdev,
bdev_offset = (disk_sector - get_start_sect(bdev)) << SECTOR_SHIFT;
sb = get_super(bdev);
- if (sb && sb->s_op->corrupted_range) {
+ if (!sb) {
+ rc = bd_disk_holder_corrupted_range(bdev, bdev_offset, len, data);
+ goto out;
+ } else if (sb->s_op->corrupted_range)
rc = sb->s_op->corrupted_range(sb, bdev, bdev_offset, len, data);
- drop_super(sb);

This is out of scope for this patch(set) but do you think that the scsi
disk driver should intercept media errors from sense data and call
->corrupted_range too? ISTR Ted muttering that one of his employers had
a patchset to do more with sense data than the upstream kernel currently
does...

- }
+ drop_super(sb);
+out:
bdput(bdev);
return rc;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9e84b1928b94..d3e6bddb8041 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1171,6 +1171,27 @@ struct bd_holder_disk {
int refcnt;
};
+int bd_disk_holder_corrupted_range(struct block_device *bdev, loff_t off, size_t len, void *data)
+{
+ struct bd_holder_disk *holder;
+ struct gendisk *disk;
+ int rc = 0;
+
+ if (list_empty(&(bdev->bd_holder_disks)))
+ return -ENODEV;
+
+ list_for_each_entry(holder, &bdev->bd_holder_disks, list) {
+ disk = holder->disk;
+ if (disk->fops->corrupted_range) {
+ rc = disk->fops->corrupted_range(disk, bdev, off, len, data);
+ if (rc != -ENODEV)
+ break;
+ }
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(bd_disk_holder_corrupted_range);
+
static struct bd_holder_disk *bd_find_holder_disk(struct block_device *bdev,
struct gendisk *disk)
{
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index ed06209008b8..fba247b852fa 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -382,9 +382,16 @@ int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
#ifdef CONFIG_SYSFS
+int bd_disk_holder_corrupted_range(struct block_device *bdev, loff_t off,
+ size_t len, void *data);
int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
#else
+int bd_disk_holder_corrupted_range(struct block_device *bdev, loff_t off,
+ size_t len, void *data)
+{
+ return 0;
+}
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
{
--
2.29.2

Next message: Daejun Park: "RE: Re: Subject: [PATCH v14 1/3] scsi: ufs: Introduce HPB feature"
Previous message: Yonghong Song: "Re: [PATCH] btf: support ints larger than 128 bits"
In reply to: Ruan Shiyang: "Re: [RFC PATCH v3 8/9] md: Implement ->corrupted_range()"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]