Re: [PATCH V7 1/8] block, bfq: split sync bfq_queues on a per-actuator basis

From: Paolo Valente
Date: Tue Dec 06 2022 - 11:27:10 EST




> Il giorno 6 dic 2022, alle ore 09:53, Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> ha scritto:
>
> [...]
>> return bfqg;
>> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
>> index 7ea427817f7f..127aeecaf903 100644
>> --- a/block/bfq-iosched.c
>> +++ b/block/bfq-iosched.c
>> @@ -377,14 +377,21 @@ static const unsigned long bfq_late_stable_merging = 600;
>> #define RQ_BIC(rq) ((struct bfq_io_cq *)((rq)->elv.priv[0]))
>> #define RQ_BFQQ(rq) ((rq)->elv.priv[1])
>>
>> -struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
>> +struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync,
>> + unsigned int actuator_idx)
>> {
>> - return bic->bfqq[is_sync];
>
> See below. But here, you could add:
>
> if (!bic)
> return NULL;
>
>> + if (is_sync)
>> + return bic->bfqq[1][actuator_idx];
>> +
>> + return bic->bfqq[0][actuator_idx];
>> }
>>
>> static void bfq_put_stable_ref(struct bfq_queue *bfqq);
>>
>> -void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
>> +void bic_set_bfqq(struct bfq_io_cq *bic,
>> + struct bfq_queue *bfqq,
>> + bool is_sync,
>> + unsigned int actuator_idx)
>> {
>> /*
>> * If bfqq != NULL, then a non-stable queue merge between
>> @@ -399,7 +406,10 @@ void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
>> * we cancel the stable merge if
>> * bic->stable_merge_bfqq == bfqq.
>> */
>> - bic->bfqq[is_sync] = bfqq;
>> + if (is_sync)
>> + bic->bfqq[1][actuator_idx] = bfqq;
>> + else
>> + bic->bfqq[0][actuator_idx] = bfqq;
>>
>> if (bfqq && bic->stable_merge_bfqq == bfqq) {
>> /*
>> @@ -672,9 +682,9 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
>> {
>> struct bfq_data *bfqd = data->q->elevator->elevator_data;
>> struct bfq_io_cq *bic = bfq_bic_lookup(data->q);
>> - struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL;
>> int depth;
>> unsigned limit = data->q->nr_requests;
>> + unsigned int act_idx;
>>
>> /* Sync reads have full depth available */
>> if (op_is_sync(opf) && !op_is_write(opf)) {
>> @@ -684,14 +694,21 @@ static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
>> limit = (limit * depth) >> bfqd->full_depth_shift;
>> }
>>
>> - /*
>> - * Does queue (or any parent entity) exceed number of requests that
>> - * should be available to it? Heavily limit depth so that it cannot
>> - * consume more available requests and thus starve other entities.
>> - */
>> - if (bfqq && bfqq_request_over_limit(bfqq, limit))
>> - depth = 1;
>> + for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
>> + struct bfq_queue *bfqq =
>> + bic ? bic_to_bfqq(bic, op_is_sync(opf), act_idx) : NULL;
>
> You could return NULL in bic_to_bfqq() if bic is NULL. That would avoid
> this cludge.

Actually, this would improve code here, but it would entail the above
(useless) control for all the other invocations :(

>
>>
>> + /*
>> + * Does queue (or any parent entity) exceed number of
>> + * requests that should be available to it? Heavily
>> + * limit depth so that it cannot consume more
>> + * available requests and thus starve other entities.
>> + */
>> + if (bfqq && bfqq_request_over_limit(bfqq, limit)) {
>> + depth = 1;
>> + break;
>> + }
>> + }
>> bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
>> __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth);
>> if (depth)
>> @@ -1812,6 +1829,18 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq,
>> return bfqq_weight > in_serv_weight;
>> }
>>
>> +/*
>> + * Get the index of the actuator that will serve bio.
>> + */
>> +static unsigned int bfq_actuator_index(struct bfq_data *bfqd, struct bio *bio)
>> +{
>> + /*
>> + * Multi-actuator support not complete yet, so always return 0
>> + * for the moment (to keep incomplete mechanisms off).
>> + */
>> + return 0;
>> +}
>> +
>> static bool bfq_better_to_idle(struct bfq_queue *bfqq);
>>
>> static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
>> @@ -2142,7 +2171,7 @@ static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq,
>> * We reset waker detection logic also if too much time has passed
>> * since the first detection. If wakeups are rare, pointless idling
>> * doesn't hurt throughput that much. The condition below makes sure
>> - * we do not uselessly idle blocking waker in more than 1/64 cases.
>> + * we do not uselessly idle blocking waker in more than 1/64 cases.
>> */
>> if (bfqd->last_completed_rq_bfqq !=
>> bfqq->tentative_waker_bfqq ||
>> @@ -2478,7 +2507,8 @@ static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
>> */
>> bfq_bic_update_cgroup(bic, bio);
>>
>> - bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf));
>> + bfqd->bio_bfqq = bic_to_bfqq(bic, op_is_sync(bio->bi_opf),
>> + bfq_actuator_index(bfqd, bio));
>
> Given that you repeat this pattern a lot, might be worth having a wrapper
> like:
>
> static inline struct bfq_queue *bio_to_bfqq(struct bfq_io_cq *bic,
> struct bio *bio)
> {
> return bic_to_bfqq(bic, op_is_sync(bio->bi_opf),
> bfq_actuator_index(bfqd, bio));
> }
>
> The code would be less verbose while still being clear.

Actually this (exact) pattern is used only twice. I'm thinking of
some more general wrapper, but each different combination seems to
have its own extra cost. Any suggestion is more than welcome.
Meanwhile, I'll send a V8 to keep this going.

As usual, in my V8 I'll apply all of your other suggestions.

Thanks,
Paolo

[...]