Re: [PATCH v20 01/12] block: Introduce queue limits and sysfs for copy-offload support

From: Nitesh Shetty
Date: Wed May 22 2024 - 02:21:05 EST


On 20/05/24 04:33PM, Damien Le Moal wrote:
On 2024/05/20 12:20, Nitesh Shetty wrote:
@@ -231,10 +237,11 @@ int blk_set_default_limits(struct queue_limits *lim)
{
/*
* Most defaults are set by capping the bounds in blk_validate_limits,
- * but max_user_discard_sectors is special and needs an explicit
- * initialization to the max value here.
+ * but max_user_discard_sectors and max_user_copy_sectors are special
+ * and needs an explicit initialization to the max value here.

s/needs/need

acked

*/
lim->max_user_discard_sectors = UINT_MAX;
+ lim->max_user_copy_sectors = UINT_MAX;
return blk_validate_limits(lim);
}

@@ -316,6 +323,25 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
}
EXPORT_SYMBOL(blk_queue_max_discard_sectors);

+/*
+ * blk_queue_max_copy_hw_sectors - set max sectors for a single copy payload
+ * @q: the request queue for the device
+ * @max_copy_sectors: maximum number of sectors to copy
+ */
+void blk_queue_max_copy_hw_sectors(struct request_queue *q,
+ unsigned int max_copy_sectors)
+{
+ struct queue_limits *lim = &q->limits;
+
+ if (max_copy_sectors > (BLK_COPY_MAX_BYTES >> SECTOR_SHIFT))
+ max_copy_sectors = BLK_COPY_MAX_BYTES >> SECTOR_SHIFT;
+
+ lim->max_copy_hw_sectors = max_copy_sectors;
+ lim->max_copy_sectors =
+ min(max_copy_sectors, lim->max_user_copy_sectors);
+}
+EXPORT_SYMBOL_GPL(blk_queue_max_copy_hw_sectors);

Hmm... Such helper seems to not fit with Christoph's changes of the limits
initialization as that is not necessarily done using &q->limits but depending on
the driver, a different limit structure. So shouldn't this function be passed a
queue_limits struct pointer instead of the request queue pointer ?

Acked, we made a mistake, we are no longer using this function after moving
to atomic limits change. We will remove this function in next version.

+
/**
* blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
* @q: the request queue for the device
@@ -633,6 +659,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_segment_size = min_not_zero(t->max_segment_size,
b->max_segment_size);

+ t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors);
+ t->max_copy_hw_sectors = min(t->max_copy_hw_sectors,
+ b->max_copy_hw_sectors);
+
t->misaligned |= b->misaligned;

alignment = queue_limit_alignment_offset(b, start);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index f0f9314ab65c..805c2b6b0393 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -205,6 +205,44 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(0, page);
}

+static ssize_t queue_copy_hw_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)
+ q->limits.max_copy_hw_sectors << SECTOR_SHIFT);
+}
+
+static ssize_t queue_copy_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n", (unsigned long long)
+ q->limits.max_copy_sectors << SECTOR_SHIFT);
+}

Given that you repeat the same pattern twice, may be add a queue_var64_show()
helper ? (naming can be changed).

Acked

+
+static ssize_t queue_copy_max_store(struct request_queue *q, const char *page,
+ size_t count)
+{
+ unsigned long max_copy_bytes;
+ struct queue_limits lim;
+ ssize_t ret;
+ int err;
+
+ ret = queue_var_store(&max_copy_bytes, page, count);
+ if (ret < 0)
+ return ret;
+
+ if (max_copy_bytes & (queue_logical_block_size(q) - 1))
+ return -EINVAL;
+
+ blk_mq_freeze_queue(q);
+ lim = queue_limits_start_update(q);
+ lim.max_user_copy_sectors = max_copy_bytes >> SECTOR_SHIFT;

max_copy_bytes is an unsigned long, so 64 bits on 64-bit arch and
max_user_copy_sectors is an unsigned int, so 32-bits. There are thus no
guarantees that this will not overflow. A check is needed.

Acked

+ err = queue_limits_commit_update(q, &lim);
+ blk_mq_unfreeze_queue(q);
+
+ if (err)

You can reuse ret here. No need for adding the err variable.
Acked


+ return err;
+ return count;
+}
+
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return queue_var_show(0, page);
@@ -505,6 +543,9 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones");
QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones");
QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones");

+QUEUE_RO_ENTRY(queue_copy_hw_max, "copy_max_hw_bytes");
+QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes");
+
QUEUE_RW_ENTRY(queue_nomerges, "nomerges");
QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity");
QUEUE_RW_ENTRY(queue_poll, "io_poll");
@@ -618,6 +659,8 @@ static struct attribute *queue_attrs[] = {
&queue_discard_max_entry.attr,
&queue_discard_max_hw_entry.attr,
&queue_discard_zeroes_data_entry.attr,
+ &queue_copy_hw_max_entry.attr,
+ &queue_copy_max_entry.attr,
&queue_write_same_max_entry.attr,
&queue_write_zeroes_max_entry.attr,
&queue_zone_append_max_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aefdda9f4ec7..109d9f905c3c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -309,6 +309,10 @@ struct queue_limits {
unsigned int discard_alignment;
unsigned int zone_write_granularity;

+ unsigned int max_copy_hw_sectors;
+ unsigned int max_copy_sectors;
+ unsigned int max_user_copy_sectors;
+
unsigned short max_segments;
unsigned short max_integrity_segments;
unsigned short max_discard_segments;
@@ -933,6 +937,8 @@ void blk_queue_max_secure_erase_sectors(struct request_queue *q,
unsigned int max_sectors);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
+extern void blk_queue_max_copy_hw_sectors(struct request_queue *q,
+ unsigned int max_copy_sectors);
extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned int);
@@ -1271,6 +1277,14 @@ static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
return bdev_get_queue(bdev)->limits.discard_granularity;
}

+/* maximum copy offload length, this is set to 128MB based on current testing */

Current testing will not be current in a while... So may be simply say
"arbitrary" or something. Also please capitalize the first letter of the
comment. So something like:

/* Arbitrary absolute limit of 128 MB for copy offload. */

+#define BLK_COPY_MAX_BYTES (1 << 27)

Also, it is not clear from the name if this is a soft limit or a cap on the
hardware limit... So at least please adjust the comment to say which one it is.

Acked, it is a soft limit.

Thank You,
Nitesh Shetty