On Fri, Jan 14, 2022 at 04:21:04PM +0800, yukuai (C) wrote:Hi, Ming Lei
在 2022/01/14 11:05, Ming Lei 写道:
On Thu, Jan 13, 2022 at 04:46:18PM +0800, yukuai (C) wrote:
在 2022/01/12 11:05, Ming Lei 写道:
Hello Yu Kuai,Hi, Ming Lei
On Mon, Jan 10, 2022 at 09:47:58PM +0800, Yu Kuai wrote:
Throttled bios can't be issued after del_gendisk() is done, thus
it's better to cancel them immediately rather than waiting for
throttle is done.
For example, if user thread is throttled with low bps while it's
issuing large io, and the device is deleted. The user thread will
wait for a long time for io to return.
Noted this patch is mainly from revertion of commit 32e3374304c7
("blk-throttle: remove tg_drain_bios") and commit b77412372b68
("blk-throttle: remove blk_throtl_drain").
Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
---
block/blk-throttle.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
block/blk-throttle.h | 2 ++
block/genhd.c | 2 ++
3 files changed, 81 insertions(+)
Just wondering why not take the built-in way in throtl_upgrade_state() for
canceling throttled bios? Something like the following, then we can avoid
to re-invent the wheel.
block/blk-throttle.c | 38 +++++++++++++++++++++++++++++++-------
block/blk-throttle.h | 2 ++
block/genhd.c | 3 +++
3 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index cf7e20804f1b..17e56b2e44c4 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1816,16 +1816,11 @@ static void throtl_upgrade_check(struct throtl_grp *tg)
throtl_upgrade_state(tg->td);
}
-static void throtl_upgrade_state(struct throtl_data *td)
+static void __throtl_cancel_bios(struct throtl_data *td)
{
struct cgroup_subsys_state *pos_css;
struct blkcg_gq *blkg;
- throtl_log(&td->service_queue, "upgrade to max");
- td->limit_index = LIMIT_MAX;
- td->low_upgrade_time = jiffies;
- td->scale = 0;
- rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_service_queue *sq = &tg->service_queue;
@@ -1834,12 +1829,41 @@ static void throtl_upgrade_state(struct throtl_data *td)
throtl_select_dispatch(sq);
throtl_schedule_next_dispatch(sq, true);
I'm confused that how can bios be canceled here?
tg->iops and tg->bps stay untouched, how can throttled bios
dispatch?
I thought that throttled bios will be canceled by 'tg->disptime = jiffies - 1;'
and the following dispatch schedule.
But looks it isn't enough, since tg_update_disptime() updates
->disptime. However,
this problem can be solved easily by not updating ->disptime in case that we are
canceling.
}
- rcu_read_unlock();
throtl_select_dispatch(&td->service_queue);
throtl_schedule_next_dispatch(&td->service_queue, true);
queue_work(kthrotld_workqueue, &td->dispatch_work);
}
+void blk_throtl_cancel_bios(struct request_queue *q)
+{
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+ spin_lock_irq(&q->queue_lock);
+ __throtl_cancel_bios(q->td);
+ spin_unlock_irq(&q->queue_lock);
+ rcu_read_unlock();
+
+ blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg)
+ del_timer_sync(&blkg_to_tg(blkg)->service_queue.pending_timer);
+ del_timer_sync(&q->td->service_queue.pending_timer);
By the way, I think delete timer will end up io hung here if there are
some bios still be throttled.
Firstly ->queue_lock is held by blk_throtl_cancel_bios(), so no new bios
will be throttled.
Also if we don't update ->disptime, any new bios throttled after releasing
->queue_lock will be dispatched soon.
Hi, Ming Lei
Just to be curiosity, I'm still trying to understand the logic here:
For example, if bps is set to 1k, and a io with size 16k is just
dispatched, then io throtle should wait for 16s untill new io can be
There isn't such wait code in blk-throttle, and the magic is just in
how to compute tg->disptime.
dispatched. (details in tg_with_in_bps_limit).
How does such mechanism bypassed here?
The point is that tg->disptime is always set as one past time, so all
throttled IOs will be dispatched immediately if ->disptime is older than
jiffies, and I have verified that the following patch can work as expected.
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 7c462c006b26..d9845afccd97 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -45,6 +45,7 @@ static struct workqueue_struct *kthrotld_workqueue;
enum tg_state_flags {
THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
THROTL_TG_WAS_EMPTY = 1 << 1, /* bio_lists[] became non-empty */
+ THROTL_TG_CANCELING = 1 << 2, /* starts to cancel all bios */
};
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
@@ -974,6 +975,9 @@ static void tg_update_disptime(struct throtl_grp *tg)
unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
struct bio *bio;
+ if (tg->flags & THROTL_TG_CANCELING)
+ goto update;
+
bio = throtl_peek_queued(&sq->queued[READ]);
if (bio)
tg_may_dispatch(tg, bio, &read_wait);
@@ -983,6 +987,7 @@ static void tg_update_disptime(struct throtl_grp *tg)
tg_may_dispatch(tg, bio, &write_wait);
min_wait = min(read_wait, write_wait);
+update:
disptime = jiffies + min_wait;
/* Update dispatch time */
@@ -1836,6 +1841,25 @@ static void throtl_upgrade_state(struct throtl_data *td)
queue_work(kthrotld_workqueue, &td->dispatch_work);
}
+void blk_throtl_cancel_bios(struct request_queue *q)
+{
+ struct cgroup_subsys_state *pos_css;
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+ spin_lock_irq(&q->queue_lock);
+ blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+ struct throtl_grp *tg = blkg_to_tg(blkg);
+ struct throtl_service_queue *sq = &tg->service_queue;
+
+ tg->disptime = jiffies - 1;
+ blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
+ throtl_schedule_pending_timer(sq, jiffies + 1);
+ }
+ spin_unlock_irq(&q->queue_lock);
+ rcu_read_unlock();
+}
+
static void throtl_downgrade_state(struct throtl_data *td)
{
td->scale /= 2;
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 175f03abd9e4..b412a4d7cc1e 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -160,12 +160,14 @@ static inline void blk_throtl_exit(struct request_queue *q) { }
static inline void blk_throtl_register_queue(struct request_queue *q) { }
static inline void blk_throtl_charge_bio_split(struct bio *bio) { }
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
+static inline void blk_throtl_cancel_bios(struct request_queue *q) {}
#else /* CONFIG_BLK_DEV_THROTTLING */
int blk_throtl_init(struct request_queue *q);
void blk_throtl_exit(struct request_queue *q);
void blk_throtl_register_queue(struct request_queue *q);
void blk_throtl_charge_bio_split(struct bio *bio);
bool __blk_throtl_bio(struct bio *bio);
+void blk_throtl_cancel_bios(struct request_queue *q);
static inline bool blk_throtl_bio(struct bio *bio)
{
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
diff --git a/block/genhd.c b/block/genhd.c
index f7577dde18fc..a32d48b87223 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,6 +29,7 @@
#include "blk.h"
#include "blk-mq-sched.h"
+#include "blk-throttle.h"
static struct kobject *block_depr;
@@ -576,6 +577,8 @@ void del_gendisk(struct gendisk *disk)
blk_integrity_del(disk);
disk_del_events(disk);
+ blk_throtl_cancel_bios(disk->queue);
+
mutex_lock(&disk->open_mutex);
remove_inode_hash(disk->part0->bd_inode);
blk_drop_partitions(disk);
Thanks,
Ming
.