Re: [PATCH 1/1] blk-mq: Fix disabled hybrid polling
From: Pavel Begunkov
Date: Thu May 30 2019 - 05:22:49 EST
On 5/28/2019 9:37 PM, Omar Sandoval wrote:
> On Sat, May 25, 2019 at 04:42:11PM +0300, Pavel Begunkov (Silence) wrote:
>> From: Pavel Begunkov <asml.silence@xxxxxxxxx>
>>
>> Commit 4bc6339a583cec650b05 ("block: move blk_stat_add() to
>> __blk_mq_end_request()") moved blk_stat_add(), so now it's called after
>> blk_update_request(), which zeroes rq->__data_len. Without length,
>> blk_stat_add() can't calculate stat bucket and returns error,
>> effectively disabling hybrid polling.
>
> I don't see how this patch fixes this problem, am I missing something?
> The timing issue seems orthogonal.
You're right, it got completely mixed up. It's rather an addition to a
patch from Hou Tao. But considering your comment below, it would be
better just to revert part of changes.
>
>> __blk_mq_end_request() is a right place to call blk_stat_add(), as it's
>> guaranteed to be called for each request. Yet, calculating time there
>> won't provide sufficient accuracy/precision for finer tuned hybrid
>> polling, because a path from __blk_mq_complete_request() to
>> __blk_mq_end_request() adds unpredictable overhead.
>>
>> Add io_end_time_ns field in struct request, save time as soon as
>> possible (at __blk_mq_complete_request()) and reuse later.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx>
>> ---
>> block/blk-mq.c | 13 ++++++++++---
>> block/blk-stat.c | 4 ++--
>> block/blk-stat.h | 2 +-
>> include/linux/blkdev.h | 11 +++++++++++
>> 4 files changed, 24 insertions(+), 6 deletions(-)
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 32b8ad3d341b..8f6b6bfe0ccb 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -330,6 +330,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
>> else
>> rq->start_time_ns = 0;
>> rq->io_start_time_ns = 0;
>> + rq->io_end_time_ns = 0;
>> rq->nr_phys_segments = 0;
>> #if defined(CONFIG_BLK_DEV_INTEGRITY)
>> rq->nr_integrity_segments = 0;
>> @@ -532,14 +533,17 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
>>
>> inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
>> {
>> - u64 now = 0;
>> + u64 now = rq->io_end_time_ns;
>
> Kyber expects the timestamp passed in to kyber_complete_request() to
> include the software overhead. iostat should probably include the
> software overhead, too. So, we probably won't be able to avoid calling
> ktime_get() twice, once for I/O time and one for the end-to-end time.
I asked that in another thread, and it seems we can't reuse this.
>
>> - if (blk_mq_need_time_stamp(rq))
>> + /* called directly bypassing __blk_mq_complete_request */
>> + if (blk_mq_need_time_stamp(rq) && !now) {
>> now = ktime_get_ns();
>> + rq->io_end_time_ns = now;
>> + }
>>
>> if (rq->rq_flags & RQF_STATS) {
>> blk_mq_poll_stats_start(rq->q);
>> - blk_stat_add(rq, now);
>> + blk_stat_add(rq);
>> }
>>
>> if (rq->internal_tag != -1)
>> @@ -579,6 +583,9 @@ static void __blk_mq_complete_request(struct request *rq)
>> bool shared = false;
>> int cpu;
>>
>> + if (blk_mq_need_time_stamp(rq))
>> + rq->io_end_time_ns = ktime_get_ns();
>> +
>> WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
>> /*
>> * Most of single queue controllers, there is only one irq vector
>> diff --git a/block/blk-stat.c b/block/blk-stat.c
>> index 940f15d600f8..9b9b30927ea8 100644
>> --- a/block/blk-stat.c
>> +++ b/block/blk-stat.c
>> @@ -48,7 +48,7 @@ void blk_rq_stat_add(struct blk_rq_stat *stat, u64 value)
>> stat->nr_samples++;
>> }
>>
>> -void blk_stat_add(struct request *rq, u64 now)
>> +void blk_stat_add(struct request *rq)
>> {
>> struct request_queue *q = rq->q;
>> struct blk_stat_callback *cb;
>> @@ -56,7 +56,7 @@ void blk_stat_add(struct request *rq, u64 now)
>> int bucket;
>> u64 value;
>>
>> - value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
>> + value = blk_rq_io_time(rq);
>>
>> blk_throtl_stat_add(rq, value);
>>
>> diff --git a/block/blk-stat.h b/block/blk-stat.h
>> index 17b47a86eefb..2653818cee36 100644
>> --- a/block/blk-stat.h
>> +++ b/block/blk-stat.h
>> @@ -65,7 +65,7 @@ struct blk_stat_callback {
>> struct blk_queue_stats *blk_alloc_queue_stats(void);
>> void blk_free_queue_stats(struct blk_queue_stats *);
>>
>> -void blk_stat_add(struct request *rq, u64 now);
>> +void blk_stat_add(struct request *rq);
>>
>> /* record time/size info in request but not add a callback */
>> void blk_stat_enable_accounting(struct request_queue *q);
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 592669bcc536..2a8d4b68d707 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -198,6 +198,9 @@ struct request {
>> u64 start_time_ns;
>> /* Time that I/O was submitted to the device. */
>> u64 io_start_time_ns;
>> + /* Time that I/O was reported completed by the device. */
>> + u64 io_end_time_ns;
>> +
>>
>> #ifdef CONFIG_BLK_WBT
>> unsigned short wbt_flags;
>> @@ -385,6 +388,14 @@ static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
>>
>> #endif /* CONFIG_BLK_DEV_ZONED */
>>
>> +static inline u64 blk_rq_io_time(struct request *rq)
>> +{
>> + u64 start = rq->io_start_time_ns;
>> + u64 end = rq->io_end_time_ns;
>> +
>> + return (end - start) ? end - start : 0;
>
> I think you meant:
>
> return end >= start ? end - start : 0;
>
Sure, thanks
>> +}
>> +
>> struct request_queue {
>> /*
>> * Together with queue_head for cacheline sharing
>> --
>> 2.21.0
>>
Attachment:
signature.asc
Description: OpenPGP digital signature