[PATCH] psi: annotate refault stalls from IO submission

From: Johannes Weiner
Date: Thu Jul 11 2019 - 16:01:40 EST


psi tracks the time tasks wait for refaulting pages to become
uptodate, but it does not track the time spent submitting the IO. The
submission part can be significant if backing storage is contended or
when cgroup throttling (io.latency) is in effect - a lot of time is
spent in submit_bio(). In that case, we underreport memory pressure.

Annotate submit_bio() to account submission time as memory stall when
the bio is reading userspace workingset pages.

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
---
block/bio.c | 3 +++
block/blk-core.c | 23 ++++++++++++++++++++++-
include/linux/blk_types.h | 1 +
3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index 29cd6cf4da51..4dd9ea0b068b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -805,6 +805,9 @@ void __bio_add_page(struct bio *bio, struct page *page,

bio->bi_iter.bi_size += len;
bio->bi_vcnt++;
+
+ if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
+ bio_set_flag(bio, BIO_WORKINGSET);
}
EXPORT_SYMBOL_GPL(__bio_add_page);

diff --git a/block/blk-core.c b/block/blk-core.c
index 5d1fc8e17dd1..5993922d63fb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -36,6 +36,7 @@
#include <linux/blk-cgroup.h>
#include <linux/debugfs.h>
#include <linux/bpf.h>
+#include <linux/psi.h>

#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
@@ -1127,6 +1128,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
*/
blk_qc_t submit_bio(struct bio *bio)
{
+ bool workingset_read = false;
+ unsigned long pflags;
+ blk_qc_t ret;
+
/*
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
@@ -1142,6 +1147,8 @@ blk_qc_t submit_bio(struct bio *bio)
if (op_is_write(bio_op(bio))) {
count_vm_events(PGPGOUT, count);
} else {
+ if (bio_flagged(bio, BIO_WORKINGSET))
+ workingset_read = true;
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
@@ -1156,7 +1163,21 @@ blk_qc_t submit_bio(struct bio *bio)
}
}

- return generic_make_request(bio);
+ /*
+ * If we're reading data that is part of the userspace
+ * workingset, count submission time as memory stall. When the
+ * device is congested, or the submitting cgroup IO-throttled,
+ * submission can be a significant part of overall IO time.
+ */
+ if (workingset_read)
+ psi_memstall_enter(&pflags);
+
+ ret = generic_make_request(bio);
+
+ if (workingset_read)
+ psi_memstall_leave(&pflags);
+
+ return ret;
}
EXPORT_SYMBOL(submit_bio);

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6a53799c3fe2..2f77e3446760 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -209,6 +209,7 @@ enum {
BIO_BOUNCED, /* bio is a bounce bio */
BIO_USER_MAPPED, /* contains user pages */
BIO_NULL_MAPPED, /* contains invalid user pages */
+ BIO_WORKINGSET, /* contains userspace workingset pages */
BIO_QUIET, /* Make BIO Quiet */
BIO_CHAIN, /* chained bio, ->bi_remaining in effect */
BIO_REFFED, /* bio has elevated ->bi_cnt */
--
2.22.0