RFC: [PATCH] Add a sysfs entry queue/ignore_flushes.

From: Anthony DeRobertis
Date: Fri Sep 09 2011 - 16:34:41 EST


I have an Oracle workload that ran quite fast under 2.6.32 due to mdraid
not supporting barriers. 2.6.33 slowed it down very substantially, and
its remains slow in 3.1. (Oracle is running on raw LVM logical volumes,
not filesystems, so ext4 barrier=0 doesn't help).

This patch adds a sysfs attribute to allow flush/fua to to turned off
(ignored) on a per-block-device basis. I have tested it on both a VM and
real hardware.

I'm not sure that queue_flags is the best place to put the bit, seems
like it may more logically go in flush_flags, but everything else is in
queue_flags.

I'm not subscribed, but will be looking for replies via list archives.
Feel free to CC me.

diff --git a/block/blk-core.c b/block/blk-core.c
index 90e1ffd..fd4f1c1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1515,9 +1515,11 @@ static inline void __generic_make_request(struct bio *bio)
/*
* Filter flush bio's early so that make_request based
* drivers without flush support don't have to worry
- * about them.
+ * about them. Also filter if configured to ignore flushes.
*/
- if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+ if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+ (!q->flush_flags ||
+ test_bit(QUEUE_FLAG_IGNORE_FLUSH, &q->queue_flags))) {
bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
if (!nr_sectors) {
err = 0;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0ee17b5..8f19827 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -214,6 +214,7 @@ queue_store_##name(struct request_queue *q, const char *page, size_t count) \

QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
+QUEUE_SYSFS_BIT_FNS(ignore_flushes, IGNORE_FLUSH, 0);
QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
#undef QUEUE_SYSFS_BIT_FNS

@@ -385,6 +386,12 @@ static struct queue_sysfs_entry queue_random_entry = {
.store = queue_store_random,
};

+static struct queue_sysfs_entry queue_ignore_flushes_entry = {
+ .attr = {.name = "ignore_flushes", .mode = S_IRUGO | S_IWUSR },
+ .show = queue_show_ignore_flushes,
+ .store = queue_store_ignore_flushes,
+};
+
static struct attribute *default_attrs[] = {
&queue_requests_entry.attr,
&queue_ra_entry.attr,
@@ -407,6 +414,7 @@ static struct attribute *default_attrs[] = {
&queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr,
&queue_random_entry.attr,
+ &queue_ignore_flushes_entry.attr,
NULL,
};

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 84b15d5..61b39e0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -408,6 +408,7 @@ struct request_queue {
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
+#define QUEUE_FLAG_IGNORE_FLUSH 19 /* ignore REQ_FLUSH and REQ_FUA */

#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/