[PATCH 4/5] brd: Fix all partitions BUGs

From: Boaz Harrosh
Date: Wed Aug 27 2014 - 11:30:56 EST


From: Boaz Harrosh <boaz@xxxxxxxxxxxxx>

This patch fixes up brd's partitions scheme, now enjoying all worlds.

The MAIN fix here is that currently, if one fdisks some partitions,
a BAD bug will make all partitions point to the same start-end sector
ie: 0 - brd_size And an mkfs of any partition would trash the partition
table and the other partition.

Another fix is that "mount -U uuid" will not work if show_part was not
specified, because of the GENHD_FL_SUPPRESS_PARTITION_INFO flag.
We now always load without it and remove the show_part parameter.

So NOW the logic goes like this:
* max_part - Just says how many minors to reserve between devices
But in any way, there can be as many partition as requested.
If minors between devices ends, then dynamic 259-major ids will
be allocated on the fly.
The default is now max_part=1, which means all partitions devt
will be from the dynamic major-range.
(If persistent partition minors is needed use max_part=)

* Creation of new devices on the fly still/always work:
mknod /path/devnod b 1 X
fdisk -l /path/devnod
Will create a new device if (X / max_part) was not already
created before. (Just as before)

partitions on the dynamically created device will work as well
Same logic applies with minors as with the pre-created ones.

TODO: dynamic grow of device size, maybe through sysfs. So each
device can have it's own size.

CC: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
Tested-by: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
Signed-off-by: Boaz Harrosh <boaz@xxxxxxxxxxxxx>
---
drivers/block/brd.c | 100 ++++++++++++++++++++--------------------------------
1 file changed, 38 insertions(+), 62 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index f841d9e..83e3d22 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -455,19 +455,18 @@ static const struct block_device_operations brd_fops = {
/*
* And now the modules code and kernel interface.
*/
-static int rd_nr;
-int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
-static int max_part;
-static int part_shift;
-static int part_show = 0;
+static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
module_param(rd_nr, int, S_IRUGO);
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
+
+int rd_size = CONFIG_BLK_DEV_RAM_SIZE;
module_param(rd_size, int, S_IRUGO);
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
+
+static int max_part = 1;
module_param(max_part, int, S_IRUGO);
-MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk");
-module_param(part_show, int, S_IRUGO);
-MODULE_PARM_DESC(part_show, "Control RAM disk visibility in /proc/partitions");
+MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
+
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
MODULE_ALIAS("rd");
@@ -513,16 +512,15 @@ static struct brd_device *brd_alloc(int i)
brd->brd_queue->limits.discard_zeroes_data = 1;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);

- disk = brd->brd_disk = alloc_disk(1 << part_shift);
+ disk = brd->brd_disk = alloc_disk(max_part);
if (!disk)
goto out_free_queue;
disk->major = RAMDISK_MAJOR;
- disk->first_minor = i << part_shift;
+ disk->first_minor = i * max_part;
disk->fops = &brd_fops;
disk->private_data = brd;
disk->queue = brd->brd_queue;
- if (!part_show)
- disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
+ disk->flags = GENHD_FL_EXT_DEVT;
sprintf(disk->disk_name, "ram%d", i);
set_capacity(disk, rd_size * 2);

@@ -544,10 +542,11 @@ static void brd_free(struct brd_device *brd)
kfree(brd);
}

-static struct brd_device *brd_init_one(int i)
+static struct brd_device *brd_init_one(int i, bool *new)
{
struct brd_device *brd;

+ *new = false;
list_for_each_entry(brd, &brd_devices, brd_list) {
if (brd->brd_number == i)
goto out;
@@ -558,6 +557,7 @@ static struct brd_device *brd_init_one(int i)
add_disk(brd->brd_disk);
list_add_tail(&brd->brd_list, &brd_devices);
}
+ *new = true;
out:
return brd;
}
@@ -573,70 +573,46 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
{
struct brd_device *brd;
struct kobject *kobj;
+ bool new;

mutex_lock(&brd_devices_mutex);
- brd = brd_init_one(MINOR(dev) >> part_shift);
+ brd = brd_init_one(MINOR(dev) / max_part, &new);
kobj = brd ? get_disk(brd->brd_disk) : NULL;
mutex_unlock(&brd_devices_mutex);

- *part = 0;
+ if (new)
+ *part = 0;
+
return kobj;
}

static int __init brd_init(void)
{
- int i, nr;
- unsigned long range;
struct brd_device *brd, *next;
+ int i;

/*
* brd module now has a feature to instantiate underlying device
* structure on-demand, provided that there is an access dev node.
- * However, this will not work well with user space tool that doesn't
- * know about such "feature". In order to not break any existing
- * tool, we do the following:
*
- * (1) if rd_nr is specified, create that many upfront, and this
- * also becomes a hard limit.
- * (2) if rd_nr is not specified, create CONFIG_BLK_DEV_RAM_COUNT
- * (default 16) rd device on module load, user can further
- * extend brd device by create dev node themselves and have
- * kernel automatically instantiate actual device on-demand.
+ * (1) if rd_nr is specified, create that many upfront. else
+ * it defaults to CONFIG_BLK_DEV_RAM_COUNT
+ * (2) User can further extend brd devices by create dev node themselves
+ * and have kernel automatically instantiate actual device
+ * on-demand. Example:
+ * mknod /path/devnod_name b 1 X # 1 is the rd major
+ * fdisk -l /path/devnod_name
+ * If (X / max_part) was not already created it will be created
+ * dynamically.
*/

- part_shift = 0;
- if (max_part > 0) {
- part_shift = fls(max_part);
-
- /*
- * Adjust max_part according to part_shift as it is exported
- * to user space so that user can decide correct minor number
- * if [s]he want to create more devices.
- *
- * Note that -1 is required because partition 0 is reserved
- * for the whole disk.
- */
- max_part = (1UL << part_shift) - 1;
- }
-
- if ((1UL << part_shift) > DISK_MAX_PARTS)
- return -EINVAL;
-
- if (rd_nr > 1UL << (MINORBITS - part_shift))
- return -EINVAL;
-
- if (rd_nr) {
- nr = rd_nr;
- range = rd_nr << part_shift;
- } else {
- nr = CONFIG_BLK_DEV_RAM_COUNT;
- range = 1UL << MINORBITS;
- }
-
if (register_blkdev(RAMDISK_MAJOR, "ramdisk"))
return -EIO;

- for (i = 0; i < nr; i++) {
+ if (unlikely(!max_part))
+ max_part = 1;
+
+ for (i = 0; i < rd_nr; i++) {
brd = brd_alloc(i);
if (!brd)
goto out_free;
@@ -648,10 +624,10 @@ static int __init brd_init(void)
list_for_each_entry(brd, &brd_devices, brd_list)
add_disk(brd->brd_disk);

- blk_register_region(MKDEV(RAMDISK_MAJOR, 0), range,
+ blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
THIS_MODULE, brd_probe, NULL, NULL);

- printk(KERN_INFO "brd: module loaded\n");
+ pr_info("brd: module loaded\n");
return 0;

out_free:
@@ -661,21 +637,21 @@ out_free:
}
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");

+ pr_info("brd: module NOT loaded !!!\n");
return -ENOMEM;
}

static void __exit brd_exit(void)
{
- unsigned long range;
struct brd_device *brd, *next;

- range = rd_nr ? rd_nr << part_shift : 1UL << MINORBITS;
-
list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
brd_del_one(brd);

- blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), range);
+ blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS);
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
+
+ pr_info("brd: module unloaded\n");
}

module_init(brd_init);
--
1.9.3


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/