When md raid1 was used with imsm metadata, during the boot stage,
the raid device will first be set to readonly, then mdmon will set
it read-write later. When there were some partitions in this device,
the following race would make some partition left ro and fail to mount.
CPU 1: CPU 2:
add_partition() set_disk_ro() //set disk RW
//disk was RO, so partition set to RO
p->policy = get_disk_ro(disk);
if (disk->part0.policy != flag) {
set_disk_ro_uevent(disk, flag);
// disk set to RW
disk->part0.policy = flag;
}
// set all exit partition to RW
while ((part = disk_part_iter_next(&piter)))
part->policy = flag;
// this part was not yet added, so it was still RO
rcu_assign_pointer(ptbl->part[partno], p);
Move RO status setting of partitions after they were added into partition
table and introduce a mutex to sync RO status between disk and partitions.
Signed-off-by: Junxiao Bi <junxiao.bi@xxxxxxxxxx>
---
block/genhd.c | 3 +++
block/partition-generic.c | 5 ++++-
include/linux/genhd.h | 1 +
3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/block/genhd.c b/block/genhd.c
index 54f1f0d381f4..f3cce1d354cf 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1479,6 +1479,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
}
ptbl = rcu_dereference_protected(disk->part_tbl, 1);
rcu_assign_pointer(ptbl->part[0], &disk->part0);
+ mutex_init(&disk->part_lock);
/*
* set_capacity() and get_capacity() currently don't use
@@ -1570,6 +1571,7 @@ void set_disk_ro(struct gendisk *disk, int flag)
struct disk_part_iter piter;
struct hd_struct *part;
+ mutex_lock(&disk->part_lock);
if (disk->part0.policy != flag) {
set_disk_ro_uevent(disk, flag);
disk->part0.policy = flag;
@@ -1579,6 +1581,7 @@ void set_disk_ro(struct gendisk *disk, int flag)
while ((part = disk_part_iter_next(&piter)))
part->policy = flag;
disk_part_iter_exit(&piter);
+ mutex_unlock(&disk->part_lock);
}
EXPORT_SYMBOL(set_disk_ro);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index aee643ce13d1..63cb6fb996ff 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -345,7 +345,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
queue_limit_discard_alignment(&disk->queue->limits, start);
p->nr_sects = len;
p->partno = partno;
- p->policy = get_disk_ro(disk);
if (info) {
struct partition_meta_info *pinfo = alloc_part_info(disk);
@@ -401,6 +400,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
/* everything is up and running, commence */
rcu_assign_pointer(ptbl->part[partno], p);
+ mutex_lock(&disk->part_lock);
+ p->policy = get_disk_ro(disk);
+ mutex_unlock(&disk->part_lock);
+
/* suppress uevent if the disk suppresses it */
if (!dev_get_uevent_suppress(ddev))
kobject_uevent(&pdev->kobj, KOBJ_ADD);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8b5330dd5ac0..df6ddca8a92c 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -201,6 +201,7 @@ struct gendisk {
*/
struct disk_part_tbl __rcu *part_tbl;
struct hd_struct part0;
+ struct mutex part_lock;
const struct block_device_operations *fops;
struct request_queue *queue;