Re: [PATCH] blk-wbt: get back the missed wakeup from __wbt_done
From: Jens Axboe
Date: Fri Aug 24 2018 - 10:58:16 EST
On 8/24/18 8:40 AM, Jens Axboe wrote:
> On 8/23/18 8:06 PM, jianchao.wang wrote:
>> Hi Jens
>>
>> On 08/23/2018 11:42 PM, Jens Axboe wrote:
>>>> -
>>>> - __set_current_state(TASK_RUNNING);
>>>> - remove_wait_queue(&rqw->wait, &wait);
>>>> + wbt_init_wait(&wait, &data);
>>>> + prepare_to_wait_exclusive(&rqw->wait, &wait,
>>>> + TASK_UNINTERRUPTIBLE);
>>>> + if (lock) {
>>>> + spin_unlock_irq(lock);
>>>> + io_schedule();
>>>> + spin_lock_irq(lock);
>>>> + } else
>>>> + io_schedule();
>>> Aren't we still missing a get-token attempt after adding to the
>>> waitqueue? For the case where someone frees the token after your initial
>>> check, but before you add yourself to the waitqueue.
>>
>> I used to think about this.
>> However, there is a very tricky scenario here:
>> We will try get the wbt budget in wbt_wake_function.
>> After add a task into the wait queue, wbt_wake_function has been able to
>> be invoked for this task. If we get the wbt budget after prepare_to_wait_exclusive,
>> we may get wbt budget twice.
>
> Ah yes good point. But without it, you've got another race that will
> potentially put you to sleep forever.
>
> How about something like the below? That should take care of both
> situations. Totally untested.
Slightly better/cleaner one below. Still totally untested.
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 84507d3e9a98..bc13544943ff 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb)
}
}
-static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
+static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
+ enum wbt_flags wb_acct)
{
- struct rq_wb *rwb = RQWB(rqos);
- struct rq_wait *rqw;
int inflight, limit;
- if (!(wb_acct & WBT_TRACKED))
- return;
-
- rqw = get_rq_wait(rwb, wb_acct);
inflight = atomic_dec_return(&rqw->inflight);
/*
@@ -166,8 +161,21 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
int diff = limit - inflight;
if (!inflight || diff >= rwb->wb_background / 2)
- wake_up(&rqw->wait);
+ wake_up_all(&rqw->wait);
}
+
+}
+
+static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
+{
+ struct rq_wb *rwb = RQWB(rqos);
+ struct rq_wait *rqw;
+
+ if (!(wb_acct & WBT_TRACKED))
+ return;
+
+ rqw = get_rq_wait(rwb, wb_acct);
+ wbt_rqw_done(rwb, rqw, wb_acct);
}
/*
@@ -481,6 +489,32 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
return limit;
}
+struct wbt_wait_data {
+ struct task_struct *curr;
+ struct rq_wb *rwb;
+ struct rq_wait *rqw;
+ unsigned long rw;
+ bool got_token;
+};
+
+static int wbt_wake_function(wait_queue_entry_t *curr, unsigned int mode,
+ int wake_flags, void *key)
+{
+ struct wbt_wait_data *data = curr->private;
+
+ /*
+ * If we fail to get a budget, return -1 to interrupt the wake up
+ * loop in __wake_up_common.
+ */
+ if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
+ return -1;
+
+ data->got_token = true;
+ wake_up_process(data->curr);
+ list_del_init(&curr->entry);
+ return 1;
+}
+
/*
* Block if we will exceed our limit, or if we are currently waiting for
* the timer to kick off queuing again.
@@ -491,31 +525,44 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
__acquires(lock)
{
struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
- DECLARE_WAITQUEUE(wait, current);
+ struct wbt_wait_data data = {
+ .curr = current,
+ .rwb = rwb,
+ .rqw = rqw,
+ .rw = rw,
+ };
+ struct wait_queue_entry wait = {
+ .func = wbt_wake_function,
+ .private = &data,
+ .entry = LIST_HEAD_INIT(wait.entry),
+ };
bool has_sleeper;
has_sleeper = wq_has_sleeper(&rqw->wait);
if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
return;
- add_wait_queue_exclusive(&rqw->wait, &wait);
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ prepare_to_wait_exclusive(&rqw->wait, &wait, TASK_UNINTERRUPTIBLE);
- if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
- break;
+ if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) {
+ finish_wait(&rqw->wait, &wait);
- if (lock) {
- spin_unlock_irq(lock);
- io_schedule();
- spin_lock_irq(lock);
- } else
- io_schedule();
- has_sleeper = false;
- } while (1);
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&rqw->wait, &wait);
+ /*
+ * We raced with wbt_wake_function() getting a token, which
+ * means we now have two. Put ours and wake anyone else
+ * potentially waiting for one.
+ */
+ if (data.got_token)
+ wbt_rqw_done(rwb, rqw, wb_acct);
+ return;
+ }
+
+ if (lock) {
+ spin_unlock_irq(lock);
+ io_schedule();
+ spin_lock_irq(lock);
+ } else
+ io_schedule();
}
static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
--
Jens Axboe