Re: [PATCH 1/2] block: add max_open_zones to blk-sysfs
From: Damien Le Moal
Date: Mon Jun 29 2020 - 21:49:49 EST
On 2020/06/16 19:28, Niklas Cassel wrote:
> Add a new max_open_zones definition in the sysfs documentation.
> This definition will be common for all devices utilizing the zoned block
> device support in the kernel.
>
> Export max open zones according to this new definition for NVMe Zoned
> Namespace devices, ZAC ATA devices (which are treated as SCSI devices by
> the kernel), and ZBC SCSI devices.
>
> Add the new max_open_zones struct member to the request_queue, rather
Add the new max_open_zones member to struct request_queue...
> than as a queue limit, since this property cannot be split across stacking
> drivers.
But device-mapper target device have a request queue too and it looks like your
patch is not setting any value, using the default 0 for dm-linear and dm-flakey.
Attaching the new attribute directly to the request queue rather than adding it
as part of the queue limits seems odd. Even if DM case is left unsupported
(using the default 0 = no limit), it may be cleaner to add the field as part of
the limit struct.
Adding the field as a device attribute rather than a queue limit, similarly to
the device maximum queue depth would be another option. In such case, including
the field directly as part of the request queue makes more sense.
>
> Signed-off-by: Niklas Cassel <niklas.cassel@xxxxxxx>
> ---
> Documentation/block/queue-sysfs.rst | 7 +++++++
> block/blk-sysfs.c | 15 +++++++++++++++
> drivers/nvme/host/zns.c | 1 +
> drivers/scsi/sd_zbc.c | 4 ++++
> include/linux/blkdev.h | 20 ++++++++++++++++++++
> 5 files changed, 47 insertions(+)
>
> diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst
> index 6a8513af9201..f01cf8530ae4 100644
> --- a/Documentation/block/queue-sysfs.rst
> +++ b/Documentation/block/queue-sysfs.rst
> @@ -117,6 +117,13 @@ Maximum number of elements in a DMA scatter/gather list with integrity
> data that will be submitted by the block layer core to the associated
> block driver.
>
> +max_open_zones (RO)
> +-------------------
> +For zoned block devices (zoned attribute indicating "host-managed" or
> +"host-aware"), the sum of zones belonging to any of the zone states:
> +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value.
> +If this value is 0, there is no limit.
> +
> max_sectors_kb (RW)
> -------------------
> This is the maximum number of kilobytes that the block layer will allow
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 02643e149d5e..fa42961e9678 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -305,6 +305,11 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
> return queue_var_show(blk_queue_nr_zones(q), page);
> }
>
> +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
> +{
> + return queue_var_show(queue_max_open_zones(q), page);
> +}
> +
> static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
> {
> return queue_var_show((blk_queue_nomerges(q) << 1) |
> @@ -667,6 +672,11 @@ static struct queue_sysfs_entry queue_nr_zones_entry = {
> .show = queue_nr_zones_show,
> };
>
> +static struct queue_sysfs_entry queue_max_open_zones_entry = {
> + .attr = {.name = "max_open_zones", .mode = 0444 },
> + .show = queue_max_open_zones_show,
> +};
> +
> static struct queue_sysfs_entry queue_nomerges_entry = {
> .attr = {.name = "nomerges", .mode = 0644 },
> .show = queue_nomerges_show,
> @@ -765,6 +775,7 @@ static struct attribute *queue_attrs[] = {
> &queue_nonrot_entry.attr,
> &queue_zoned_entry.attr,
> &queue_nr_zones_entry.attr,
> + &queue_max_open_zones_entry.attr,
> &queue_nomerges_entry.attr,
> &queue_rq_affinity_entry.attr,
> &queue_iostats_entry.attr,
> @@ -792,6 +803,10 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr,
> (!q->mq_ops || !q->mq_ops->timeout))
> return 0;
>
> + if (attr == &queue_max_open_zones_entry.attr &&
> + !blk_queue_is_zoned(q))
> + return 0;
> +
> return attr->mode;
> }
>
> diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
> index c08f6281b614..af156529f3b6 100644
> --- a/drivers/nvme/host/zns.c
> +++ b/drivers/nvme/host/zns.c
> @@ -82,6 +82,7 @@ int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
>
> q->limits.zoned = BLK_ZONED_HM;
> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> + blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
> free_data:
> kfree(id);
> return status;
> diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
> index 183a20720da9..aa3564139b40 100644
> --- a/drivers/scsi/sd_zbc.c
> +++ b/drivers/scsi/sd_zbc.c
> @@ -717,6 +717,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
> /* The drive satisfies the kernel restrictions: set it up */
> blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
> blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
> + if (sdkp->zones_max_open == U32_MAX)
> + blk_queue_max_open_zones(q, 0);
> + else
> + blk_queue_max_open_zones(q, sdkp->zones_max_open);
This is correct only for host-managed drives. Host-aware models define the
"OPTIMAL NUMBER OF OPEN SEQUENTIAL WRITE PREFERRED ZONES" instead of a maximum
number of open sequential write required zones.
Since the standard does not actually explicitly define what the value of the
maximum number of open sequential write required zones should be for a
host-aware drive, I would suggest to always have the max_open_zones value set to
0 for host-aware disks.
> nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
>
> /* READ16/WRITE16 is mandatory for ZBC disks */
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 8fd900998b4e..2f332f00501d 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -520,6 +520,7 @@ struct request_queue {
> unsigned int nr_zones;
> unsigned long *conv_zones_bitmap;
> unsigned long *seq_zones_wlock;
> + unsigned int max_open_zones;
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> /*
> @@ -729,6 +730,17 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
> return true;
> return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap);
> }
> +
> +static inline void blk_queue_max_open_zones(struct request_queue *q,
> + unsigned int max_open_zones)
> +{
> + q->max_open_zones = max_open_zones;
> +}
> +
> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> +{
> + return q->max_open_zones;
> +}
> #else /* CONFIG_BLK_DEV_ZONED */
> static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
> {
> @@ -744,6 +756,14 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
> {
> return 0;
> }
> +static inline void blk_queue_max_open_zones(struct request_queue *q,
> + unsigned int max_open_zones)
> +{
> +}
Why is this one necessary ? For the !CONFIG_BLK_DEV_ZONED case, no driver should
ever call this function.
> +static inline unsigned int queue_max_open_zones(const struct request_queue *q)
> +{
> + return 0;
> +}
> #endif /* CONFIG_BLK_DEV_ZONED */
>
> static inline bool rq_is_sync(struct request *rq)
>
--
Damien Le Moal
Western Digital Research