[RFC PATCH 4/5] block, mm: Added the necessary plumbing to take ioprio hints down to block layer

From: Jason B. Akers
Date: Wed Oct 29 2014 - 14:36:49 EST


From: Kapil Karkra <kapil.karkra@xxxxxxxxx>

Added the necessary plumbing to take the ioprio hints down to the block
layer from where they further flow down into the libata. For reads or
direct IO, bio_associate_ioprio (invoked from blk_throtl_bio) copies
the ioprio from the current io context into the bio in the submit_bio
context. For lazy writes, 3 bits from the page_flags are used to record
ioprio in every page associated with a particular IO. Since page-flags
are scarce, we do this enabling only on 64 bit platforms. We take the
ioprio from the current io context and store it into each page in
grab_cache_page_write_begin function. the bio_associate_ioprio method
walks through all pages and determines the overall best priority to be
associated to the bio. The bio carries the io priority further down the
IO stack.

Signed-off-by: Kapil Karkra <kapil.karkra@xxxxxxxxx>
Signed-off-by: Jason B. Akers <jason.b.akers@xxxxxxxxx>
---
block/bio.c | 34 ++++++++++++++++++++++++++++++++++
block/blk-throttle.c | 5 +++++
include/linux/bio.h | 1 +
include/linux/page-flags.h | 24 ++++++++++++++++++++++++
mm/debug.c | 5 +++++
mm/filemap.c | 18 ++++++++++++++++++
6 files changed, 87 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index b93ae04..cc5cc64 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1965,6 +1965,40 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
}
EXPORT_SYMBOL(bioset_create);

+int bio_associate_ioprio(struct bio *bio)
+{
+ struct io_context *ioc;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ int max_ioprio = 0; /* init max_ioprio to 0 (invalid) */
+ int advice, ioprio;
+
+ ioc = current->io_context;
+ if (!ioc)
+ return -ENOENT;
+
+ /* scan the bio_vecs for this bio and get the highest
+ * ioprio to use for current
+ */
+ bio_for_each_segment(bv, bio, iter) {
+ advice = PageGetAdvice(bv.bv_page);
+ ioprio = IOPRIO_ADVISE(0, 0, advice);
+ if (ioprio_advice_valid(ioprio))
+ max_ioprio = ioprio_best(ioprio, max_ioprio);
+ }
+
+ /* set max priority found in all bio_vecs */
+ bio_set_prio(bio, max_ioprio);
+
+ /* acquire active ref on @ioc and associate
+ * also handles the read case
+ */
+ bio_associate_ioc(bio,ioc);
+ bio_set_prio(bio, ioprio_best(ioc->ioprio, max_ioprio));
+
+ return 0;
+}
+
/**
* bioset_create_nobvec - Create a bio_set without bio_vec mempool
* @pool_size: Number of bio to cache in the mempool
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 9273d09..abc33a5 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1484,6 +1484,11 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
struct blkcg *blkcg;
bool throttled = false;

+ /* associate the best ioprio to the bio */
+ spin_lock_irq(q->queue_lock);
+ bio_associate_ioprio(bio);
+ spin_unlock_irq(q->queue_lock);
+
/* see throtl_charge_bio() */
if (bio->bi_rw & REQ_THROTTLED)
goto out;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8419319..4747c78 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -470,6 +470,7 @@ extern struct bio_vec *bvec_alloc(gfp_t, int, unsigned long *, mempool_t *);
extern void bvec_free(mempool_t *, struct bio_vec *, unsigned int);
extern unsigned int bvec_nr_vecs(unsigned short idx);

+int bio_associate_ioprio(struct bio *bio);
int bio_associate_current(struct bio *bio);
void bio_disassociate_task(struct bio *bio);

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e1f5fcd..8811234 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,6 +109,11 @@ enum pageflags {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
PG_compound_lock,
#endif
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ PG_ioprio_advice_0, /* 3 flag bits store ioprio advice */
+ PG_ioprio_advice_1,
+ PG_ioprio_advice_2,
+#endif
__NR_PAGEFLAGS,

/* Filesystems */
@@ -370,6 +375,25 @@ static inline void ClearPageCompound(struct page *page)

#define PG_head_mask ((1L << PG_head))

+/*
+ * ioprio advise is recorded here
+ */
+static inline void PageSetAdvice(struct page *page, unsigned int advice)
+{
+ page->flags = (page->flags |
+ ((((advice >> 0) & 1) << PG_ioprio_advice_0) |
+ (((advice >> 1) & 1) << PG_ioprio_advice_1) |
+ (((advice >> 2) & 1) << PG_ioprio_advice_2)));
+}
+
+static inline int PageGetAdvice(struct page *page)
+{
+ unsigned int advice = (((page->flags >> PG_ioprio_advice_0) & 1) |
+ (((page->flags >> PG_ioprio_advice_1) & 1) << 1) |
+ (((page->flags >> PG_ioprio_advice_2) & 1) << 2));
+ return advice;
+}
+
#else
/*
* Reduce page flag use as much as possible by overlapping
diff --git a/mm/debug.c b/mm/debug.c
index 5ce45c9..c785b06 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -48,6 +48,11 @@ static const struct trace_print_flags pageflag_names[] = {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
{1UL << PG_compound_lock, "compound_lock" },
#endif
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+ {1UL << PG_ioprio_advice_0, "ioprio_adv0" },
+ {1UL << PG_ioprio_advice_1, "ioprio_adv1" },
+ {1UL << PG_ioprio_advice_2, "ioprio_adv2" },
+#endif
};

static void dump_flags(unsigned long flags,
diff --git a/mm/filemap.c b/mm/filemap.c
index 14b4642..f82529d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2438,6 +2438,9 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
{
struct page *page;
int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT;
+ struct io_context *ioc;
+ int advice;
+ int ioprio;

if (flags & AOP_FLAG_NOFS)
fgp_flags |= FGP_NOFS;
@@ -2448,6 +2451,21 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
if (page)
wait_for_stable_page(page);

+ /* store the ioprio into the page flags */
+ if (current && current->io_context) {
+ ioc = current->io_context;
+ advice = PageGetAdvice(page);
+ ioprio = IOPRIO_ADVISE(0, 0, advice);
+ if (ioprio_advice_valid(ioc->ioprio)) {
+ if (ioprio_advice_valid(ioprio))
+ ioprio = ioprio_best(ioprio, ioc->ioprio);
+ else
+ ioprio = ioc->ioprio;
+
+ PageSetAdvice(page, IOPRIO_ADVICE(ioprio));
+ }
+ }
+
return page;
}
EXPORT_SYMBOL(grab_cache_page_write_begin);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/