[PATCH v3 2/5] block: introduce del_gendisk_queue()

From: Dan Williams
Date: Thu Jan 07 2016 - 19:43:39 EST


The current block device shutdown sequence of del_gendisk +
blk_cleanup_queue is problematic. We want to tell the fs after
blk_cleanup_queue that there is no possibility of recovery, but by that
time we have deleted partitions and lost the ability to find all the
super-blocks on a block device.

del_gendisk_queue() combines block device shutdown, blk_cleanup_queue(),
with block device end of life notification, del_gendisk(). Later
patches builds on this sequence to unmap all dax inodes and communicate
to the fs that it should force-fail all future i/o since the queue is
permanently dead. For now this is routine is functionally equivalent to
calling del_gendisk() + blk_cleanup_queue() in succession.

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
block/genhd.c | 43 ++++++++++++++++++++++++++++++++++++++++++
drivers/block/brd.c | 9 +++------
drivers/nvdimm/pmem.c | 3 +--
drivers/s390/block/dcssblk.c | 6 ++----
include/linux/genhd.h | 1 +
5 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index b1d1df42ba13..a5bb768111cc 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -686,6 +686,49 @@ void del_gendisk(struct gendisk *disk)
EXPORT_SYMBOL(del_gendisk);

/**
+ * del_gendisk_queue - combined del_gendisk + blk_cleanup_queue
+ * @disk: disk to delete, invalidate, unmap, and force-fail fs operations
+ *
+ * This is an alternative for open coded calls to:
+ * del_gendisk()
+ * blk_cleanup_queue()
+ * It notifies filesystems / vfs that a block device is permanently dead
+ * after the queue has been torn down. This notification is needed for
+ * triggering a filesystem to abort its error recovery and for (DAX)
+ * capable devices. DAX bypasses page cache and mappings go directly to
+ * storage media. When such a disk is removed the pfn backing a mapping
+ * may be invalid or removed from the system. Upon return accessing DAX
+ * mappings of this disk will trigger SIGBUS.
+ */
+void del_gendisk_queue(struct gendisk *disk)
+{
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+
+ del_gendisk_start(disk);
+
+ /* pass1 sync fs + evict idle inodes */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+ for_each_part(part, &piter)
+ invalidate_partition(disk, part->partno);
+ disk_part_iter_exit(&piter);
+ invalidate_partition(disk, 0);
+
+ blk_cleanup_queue(disk->queue);
+
+ /* pass2 the queue is dead */
+ disk_part_iter_init(&piter, disk,
+ DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
+ for_each_part(part, &piter)
+ delete_partition(disk, part->partno);
+ disk_part_iter_exit(&piter);
+
+ del_gendisk_end(disk);
+}
+EXPORT_SYMBOL(del_gendisk_queue);
+
+/**
* get_gendisk - get partitioning information for a given device
* @devt: device to get partitioning information for
* @partno: returned partition index
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index a5880f4ab40e..013ff58f9af8 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -532,7 +532,6 @@ out:
static void brd_free(struct brd_device *brd)
{
put_disk(brd->brd_disk);
- blk_cleanup_queue(brd->brd_queue);
brd_free_pages(brd);
kfree(brd);
}
@@ -560,7 +559,7 @@ out:
static void brd_del_one(struct brd_device *brd)
{
list_del(&brd->brd_list);
- del_gendisk(brd->brd_disk);
+ del_gendisk_queue(brd->brd_disk);
brd_free(brd);
}

@@ -626,10 +625,8 @@ static int __init brd_init(void)
return 0;

out_free:
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
- list_del(&brd->brd_list);
- brd_free(brd);
- }
+ list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
+ brd_del_one(brd);
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");

pr_info("brd: module NOT loaded !!!\n");
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8ee79893d2f5..6dd06e9d34b0 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -158,9 +158,8 @@ static void pmem_detach_disk(struct pmem_device *pmem)
if (!pmem->pmem_disk)
return;

- del_gendisk(pmem->pmem_disk);
+ del_gendisk_queue(pmem->pmem_disk);
put_disk(pmem->pmem_disk);
- blk_cleanup_queue(pmem->pmem_queue);
}

static int pmem_attach_disk(struct device *dev,
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 94a8f4ab57bc..0c3c968b57d9 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -388,8 +388,7 @@ removeseg:
}
list_del(&dev_info->lh);

- del_gendisk(dev_info->gd);
- blk_cleanup_queue(dev_info->dcssblk_queue);
+ del_gendisk_queue(dev_info->gd);
dev_info->gd->queue = NULL;
put_disk(dev_info->gd);
up_write(&dcssblk_devices_sem);
@@ -751,8 +750,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
}

list_del(&dev_info->lh);
- del_gendisk(dev_info->gd);
- blk_cleanup_queue(dev_info->dcssblk_queue);
+ del_gendisk_queue(dev_info->gd);
dev_info->gd->queue = NULL;
put_disk(dev_info->gd);
device_unregister(&dev_info->dev);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 847cc1d91634..028cf15a8a57 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -431,6 +431,7 @@ extern void part_round_stats(int cpu, struct hd_struct *part);
/* block/genhd.c */
extern void add_disk(struct gendisk *disk);
extern void del_gendisk(struct gendisk *gp);
+extern void del_gendisk_queue(struct gendisk *disk);
extern struct gendisk *get_gendisk(dev_t dev, int *partno);
extern struct block_device *bdget_disk(struct gendisk *disk, int partno);