Re: [PATCH 1/4] block: add zone open, close and finish support

From: Damien Le Moal
Date: Fri Jun 21 2019 - 20:51:45 EST


Matias,

Some comments inline below.

On 2019/06/21 22:07, Matias Bjørling wrote:
> From: Ajay Joshi <ajay.joshi@xxxxxxx>
>
> Zoned block devices allows one to control zone transitions by using
> explicit commands. The available transitions are:
>
> * Open zone: Transition a zone to open state.
> * Close zone: Transition a zone to closed state.
> * Finish zone: Transition a zone to full state.
>
> Allow kernel to issue these transitions by introducing
> blkdev_zones_mgmt_op() and add three new request opcodes:
>
> * REQ_IO_ZONE_OPEN, REQ_IO_ZONE_CLOSE, and REQ_OP_ZONE_FINISH
>
> Allow user-space to issue the transitions through the following ioctls:
>
> * BLKOPENZONE, BLKCLOSEZONE, and BLKFINISHZONE.
>
> Signed-off-by: Ajay Joshi <ajay.joshi@xxxxxxx>
> Signed-off-by: Aravind Ramesh <aravind.ramesh@xxxxxxx>
> Signed-off-by: Matias Bjørling <matias.bjorling@xxxxxxx>
> ---
> block/blk-core.c | 3 ++
> block/blk-zoned.c | 51 ++++++++++++++++++++++---------
> block/ioctl.c | 5 ++-
> include/linux/blk_types.h | 27 +++++++++++++++--
> include/linux/blkdev.h | 57 ++++++++++++++++++++++++++++++-----
> include/uapi/linux/blkzoned.h | 17 +++++++++--
> 6 files changed, 133 insertions(+), 27 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 8340f69670d8..c0f0dbad548d 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -897,6 +897,9 @@ generic_make_request_checks(struct bio *bio)
> goto not_supported;
> break;
> case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
> if (!blk_queue_is_zoned(q))
> goto not_supported;
> break;
> diff --git a/block/blk-zoned.c b/block/blk-zoned.c
> index ae7e91bd0618..d0c933593b93 100644
> --- a/block/blk-zoned.c
> +++ b/block/blk-zoned.c
> @@ -201,20 +201,22 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
> EXPORT_SYMBOL_GPL(blkdev_report_zones);
>
> /**
> - * blkdev_reset_zones - Reset zones write pointer
> + * blkdev_zones_mgmt_op - Perform the specified operation on the zone(s)
> * @bdev: Target block device
> - * @sector: Start sector of the first zone to reset
> - * @nr_sectors: Number of sectors, at least the length of one zone
> + * @op: Operation to be performed on the zone(s)
> + * @sector: Start sector of the first zone to operate on
> + * @nr_sectors: Number of sectors, at least the length of one zone and
> + * must be zone size aligned.
> * @gfp_mask: Memory allocation flags (for bio_alloc)
> *
> * Description:
> - * Reset the write pointer of the zones contained in the range
> + * Perform the specified operation contained in the range
Perform the specified operation over the sector range
> * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
> * is valid, but the specified range should not contain conventional zones.
> */
> -int blkdev_reset_zones(struct block_device *bdev,
> - sector_t sector, sector_t nr_sectors,
> - gfp_t gfp_mask)
> +int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> {
> struct request_queue *q = bdev_get_queue(bdev);
> sector_t zone_sectors;
> @@ -226,6 +228,9 @@ int blkdev_reset_zones(struct block_device *bdev,
> if (!blk_queue_is_zoned(q))
> return -EOPNOTSUPP;
>
> + if (!op_is_zone_mgmt_op(op))
> + return -EOPNOTSUPP;

EINVAL may be better here.

> +
> if (bdev_read_only(bdev))
> return -EPERM;
>
> @@ -248,7 +253,7 @@ int blkdev_reset_zones(struct block_device *bdev,
> bio = blk_next_bio(bio, 0, gfp_mask);
> bio->bi_iter.bi_sector = sector;
> bio_set_dev(bio, bdev);
> - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
> + bio_set_op_attrs(bio, op, 0);
>
> sector += zone_sectors;
>
> @@ -264,7 +269,7 @@ int blkdev_reset_zones(struct block_device *bdev,
>
> return ret;
> }
> -EXPORT_SYMBOL_GPL(blkdev_reset_zones);
> +EXPORT_SYMBOL_GPL(blkdev_zones_mgmt_op);
>
> /*
> * BLKREPORTZONE ioctl processing.
> @@ -329,15 +334,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
> }
>
> /*
> - * BLKRESETZONE ioctl processing.
> + * Zone operation (open, close, finish or reset) ioctl processing.
> * Called from blkdev_ioctl.
> */
> -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> - unsigned int cmd, unsigned long arg)
> +int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
> + unsigned int cmd, unsigned long arg)
> {
> void __user *argp = (void __user *)arg;
> struct request_queue *q;
> struct blk_zone_range zrange;
> + enum req_opf op;
>
> if (!argp)
> return -EINVAL;
> @@ -358,8 +364,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
> return -EFAULT;
>
> - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
> - GFP_KERNEL);
> + switch (cmd) {
> + case BLKRESETZONE:
> + op = REQ_OP_ZONE_RESET;
> + break;
> + case BLKOPENZONE:
> + op = REQ_OP_ZONE_OPEN;
> + break;
> + case BLKCLOSEZONE:
> + op = REQ_OP_ZONE_CLOSE;
> + break;
> + case BLKFINISHZONE:
> + op = REQ_OP_ZONE_FINISH;
> + break;
> + default:
> + return -ENOTTY;
> + }
> +
> + return blkdev_zones_mgmt_op(bdev, op, zrange.sector, zrange.nr_sectors,
> + GFP_KERNEL);
> }
>
> static inline unsigned long *blk_alloc_zone_bitmap(int node,
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 15a0eb80ada9..df7fe54db158 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
> case BLKREPORTZONE:
> return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
> case BLKRESETZONE:
> - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
> + case BLKOPENZONE:
> + case BLKCLOSEZONE:
> + case BLKFINISHZONE:
> + return blkdev_zones_mgmt_op_ioctl(bdev, mode, cmd, arg);
> case BLKGETZONESZ:
> return put_uint(arg, bdev_zone_sectors(bdev));
> case BLKGETNRZONES:
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 95202f80676c..067ef9242275 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -284,13 +284,20 @@ enum req_opf {
> REQ_OP_DISCARD = 3,
> /* securely erase sectors */
> REQ_OP_SECURE_ERASE = 5,
> - /* reset a zone write pointer */
> - REQ_OP_ZONE_RESET = 6,
> /* write the same sector many times */
> REQ_OP_WRITE_SAME = 7,
> /* write the zero filled sector many times */
> REQ_OP_WRITE_ZEROES = 9,
>
> + /* reset a zone write pointer */
> + REQ_OP_ZONE_RESET = 16,
> + /* Open zone(s) */
> + REQ_OP_ZONE_OPEN = 17,
> + /* Close zone(s) */
> + REQ_OP_ZONE_CLOSE = 18,
> + /* Finish zone(s) */
> + REQ_OP_ZONE_FINISH = 19,
> +
> /* SCSI passthrough using struct scsi_request */
> REQ_OP_SCSI_IN = 32,
> REQ_OP_SCSI_OUT = 33,
> @@ -375,6 +382,22 @@ static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
> bio->bi_opf = op | op_flags;
> }
>
> +/*
> + * Check if the op is zoned operation.
Check if op is a zone management operation.
> + */
> +static inline bool op_is_zone_mgmt_op(enum req_opf op)

Similarly to "op_is_write" pattern, "op_is_zone_mgmt" may be a better name.

> +{
> + switch (op) {
> + case REQ_OP_ZONE_RESET:
> + case REQ_OP_ZONE_OPEN:
> + case REQ_OP_ZONE_CLOSE:
> + case REQ_OP_ZONE_FINISH:
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> static inline bool op_is_write(unsigned int op)
> {
> return (op & 1);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 592669bcc536..943084f9dc9c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -348,14 +348,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
> extern int blkdev_report_zones(struct block_device *bdev,
> sector_t sector, struct blk_zone *zones,
> unsigned int *nr_zones, gfp_t gfp_mask);
> -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
> - sector_t nr_sectors, gfp_t gfp_mask);
> extern int blk_revalidate_disk_zones(struct gendisk *disk);
>
> extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
> unsigned int cmd, unsigned long arg);
> -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
> - unsigned int cmd, unsigned long arg);
> +extern int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev, fmode_t mode,
> + unsigned int cmd, unsigned long arg);
> +extern int blkdev_zones_mgmt_op(struct block_device *bdev, enum req_opf op,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask);

To keep the grouping of declarations, may be declare this one where
blkdev_reset_zones() was ?

>
> #else /* CONFIG_BLK_DEV_ZONED */
>
> @@ -376,15 +377,57 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
> return -ENOTTY;
> }
>
> -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
> - fmode_t mode, unsigned int cmd,
> - unsigned long arg)
> +static inline int blkdev_zones_mgmt_op_ioctl(struct block_device *bdev,
> + fmode_t mode, unsigned int cmd,
> + unsigned long arg)
> +{
> + return -ENOTTY;
> +}
> +
> +static inline int blkdev_zones_mgmt_op(struct block_device *bdev,
> + enum req_opf op,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> {
> return -ENOTTY;

That should be -ENOTSUPP. This is not an ioctl. The ioctl call is above this one.

> }
>
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> +static inline int blkdev_reset_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_RESET,
> + sector, nr_sectors, gfp_mask);
> +}
> +
> +static inline int blkdev_open_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_OPEN,
> + sector, nr_sectors, gfp_mask);
> +}
> +
> +static inline int blkdev_close_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_CLOSE,
> + sector, nr_sectors,
> + gfp_mask);
> +}
> +
> +static inline int blkdev_finish_zones(struct block_device *bdev,
> + sector_t sector, sector_t nr_sectors,
> + gfp_t gfp_mask)
> +{
> + return blkdev_zones_mgmt_op(bdev, REQ_OP_ZONE_FINISH,
> + sector, nr_sectors,
> + gfp_mask);
> +}
> +
> struct request_queue {
> /*
> * Together with queue_head for cacheline sharing
> diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
> index 498eec813494..701e0692b8d3 100644
> --- a/include/uapi/linux/blkzoned.h
> +++ b/include/uapi/linux/blkzoned.h
> @@ -120,9 +120,11 @@ struct blk_zone_report {
> };
>
> /**
> - * struct blk_zone_range - BLKRESETZONE ioctl request
> - * @sector: starting sector of the first zone to issue reset write pointer
> - * @nr_sectors: Total number of sectors of 1 or more zones to reset
> + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
> + * BLKCLOSEZONE/BLKFINISHZONE ioctl
> + * request
> + * @sector: starting sector of the first zone to operate on
> + * @nr_sectors: Total number of sectors of all zones to operate on
> */
> struct blk_zone_range {
> __u64 sector;
> @@ -139,10 +141,19 @@ struct blk_zone_range {
> * sector range. The sector range must be zone aligned.
> * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
> * @BLKGETNRZONES: Get the total number of zones of the device.
> + * @BLKOPENZONE: Open the zones in the specified sector range. The
> + * sector range must be zone aligned.
> + * @BLKCLOSEZONE: Close the zones in the specified sector range. The
> + * sector range must be zone aligned.
> + * @BLKFINISHZONE: Finish the zones in the specified sector range. The
> + * sector range must be zone aligned.
> */
> #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
> #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
> #define BLKGETZONESZ _IOR(0x12, 132, __u32)
> #define BLKGETNRZONES _IOR(0x12, 133, __u32)
> +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
> +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
> +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
>
> #endif /* _UAPI_BLKZONED_H */
>


--
Damien Le Moal
Western Digital Research