[PATCH 3/4] block: Add seek histograms to the block histograms
From: Divyesh Shah
Date: Thu Apr 15 2010 - 01:45:54 EST
Signed-off-by: Divyesh Shah <dpshah@xxxxxxxxxx>
From: Edward Falk <efalk@xxxxxxxxxx>
---
block/Kconfig | 9 ++++
block/genhd.c | 103 +++++++++++++++++++++++++++++++++++++++++++------
fs/partitions/check.c | 4 ++
include/linux/genhd.h | 14 ++++++-
4 files changed, 117 insertions(+), 13 deletions(-)
diff --git a/block/Kconfig b/block/Kconfig
index b62fe49..5dbc10b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -126,6 +126,15 @@ config HISTO_TIME_BUCKETS
This option controls how many buckets are used to collect
transfer time statistics.
+config HISTO_SEEK_BUCKETS
+ int "Number of seek buckets in histogram"
+ depends on BLOCK_HISTOGRAM
+ default "20"
+ ---help---
+ This option controls how many buckets are used to collect
+ disk seek statistics. The actual number of buckets is 1 greater
+ than the number specified here as the last bucket is a catch-all one.
+
endif # BLOCK
config BLOCK_COMPAT
diff --git a/block/genhd.c b/block/genhd.c
index 3666cf2..8920994 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -890,6 +890,8 @@ static DEVICE_ATTR(write_request_histo, S_IRUGO | S_IWUSR,
part_write_request_histo_show, part_write_histo_clear);
static DEVICE_ATTR(write_dma_histo, S_IRUGO | S_IWUSR,
part_write_dma_histo_show, part_write_histo_clear);
+static DEVICE_ATTR(seek_histo, S_IRUGO | S_IWUSR,
+ part_seek_histo_show, part_seek_histo_clear);
#endif
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
@@ -917,6 +919,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_read_dma_histo.attr,
&dev_attr_write_request_histo.attr,
&dev_attr_write_dma_histo.attr,
+ &dev_attr_seek_histo.attr,
#endif
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
@@ -1304,6 +1307,8 @@ int invalidate_partition(struct gendisk *disk, int partno)
EXPORT_SYMBOL(invalidate_partition);
#ifdef CONFIG_BLOCK_HISTOGRAM
+typedef void (part_histo_reset) (struct disk_stats *, int);
+
/*
* Clear one per-cpu instance of a particular I/O histogram. This should always
* be called between part_stat_lock() and part_stat_unklock() calls.
@@ -1317,23 +1322,27 @@ static inline void __block_part_histogram_reset(struct disk_stats *stats,
memset(&stats->wr_histo, 0, sizeof(stats->wr_histo));
}
+static inline void __block_part_seek_histogram_reset(struct disk_stats *stats,
+ int dummy)
+{
+ memset(&stats->seek_histo, 0, sizeof(stats->seek_histo));
+}
+
/*
* Clear the I/O histogram for a given partition.
*/
-static void block_part_histogram_reset(struct hd_struct *part, int direction)
+static void block_part_histogram_reset(struct hd_struct *part,
+ part_histo_reset *reset_fn, int direction)
{
#ifdef CONFIG_SMP
int i;
part_stat_lock();
- for_each_possible_cpu(i) {
- if (cpu_possible(i))
- __block_part_histogram_reset(per_cpu_ptr(part->dkstats,
- i), direction);
- }
+ for_each_possible_cpu(i)
+ reset_fn(per_cpu_ptr(part->dkstats, i), direction);
#else
part_stat_lock();
- __block_part_histogram_reset(&part.dkstats, direction);
+ reset_fn(&part.dkstats, direction);
#endif
part_stat_unlock();
}
@@ -1342,7 +1351,8 @@ static void block_part_histogram_reset(struct hd_struct *part, int direction)
* Iterate though all partitions of the disk and clear the specified
* (read/write) histogram.
*/
-static int block_disk_histogram_reset(struct hd_struct *part, int direction)
+static int block_disk_histogram_reset(struct hd_struct *part,
+ part_histo_reset *reset_fn, int direction)
{
struct disk_part_iter piter;
struct gendisk *disk = part_to_disk(part);
@@ -1353,11 +1363,16 @@ static int block_disk_histogram_reset(struct hd_struct *part, int direction)
disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0);
while ((temp = disk_part_iter_next(&piter)))
- block_part_histogram_reset(temp, direction);
+ block_part_histogram_reset(temp, reset_fn, direction);
disk_part_iter_exit(&piter);
return 0;
}
+void init_part_histo_defaults(struct hd_struct *part)
+{
+ part->last_end_sector = part->start_sect;
+}
+
/*
* Map transfer size to histogram bucket. Transfer sizes are exponentially
* increasing. For example: 4,8,16,... sectors.
@@ -1397,6 +1412,15 @@ static inline int stats_time_bucket(int jiffies)
}
/*
+ * Map seek distance to histogram bucket. This also uses an exponential
+ * increment : 8, 16, 32, ... sectors.
+ */
+static inline int stats_seek_bucket(sector_t distance)
+{
+ return min(fls64(distance >> 3), CONFIG_HISTO_SEEK_BUCKETS);
+}
+
+/*
* Log I/O completion, update histogram.
*
* @part: disk device partition
@@ -1407,11 +1431,20 @@ static inline int stats_time_bucket(int jiffies)
static inline void __block_histogram_completion(int cpu, struct hd_struct *part,
struct request *req, unsigned int req_ms, unsigned int dma_ms)
{
- sector_t sectors = blk_rq_size(req);
+ sector_t sectors = blk_rq_size(req), end_sector = blk_rq_pos(req);
+ sector_t distance, start_sector = end_sector - sectors;
int size_idx = stats_size_bucket(sectors);
int req_time_idx = stats_time_bucket(req_ms);
int dma_time_idx = stats_time_bucket(dma_ms);
+ if (start_sector >= part->last_end_sector)
+ distance = start_sector - part->last_end_sector;
+ else
+ distance = part->last_end_sector - start_sector;
+
+ part_stat_inc(cpu, part, seek_histo[stats_seek_bucket(distance)]);
+ part->last_end_sector = end_sector;
+
if (!rq_data_dir(req))
part_stat_inc(cpu, part,
rd_histo[HISTO_REQUEST][size_idx][req_time_idx]);
@@ -1455,6 +1488,11 @@ static uint64_t histo_stat_read(struct hd_struct *part, int direction,
part_stat_read(part, wr_histo[i][j][k]);
}
+static uint64_t seek_histo_stat_read(struct hd_struct *part, int i)
+{
+ return part_stat_read(part, seek_histo[i]);
+}
+
/*
* Dumps the specified 'type' of histogram for part to out.
* The result must be less than PAGE_SIZE.
@@ -1508,6 +1546,28 @@ static int dump_histo(struct hd_struct *part, int direction, int type,
}
/*
+ * Dumps the seek histogram for part. The result must be less than PAGE_SIZE.
+ */
+static int dump_seek_histo(struct hd_struct *part, char* page)
+{
+ ssize_t rem = PAGE_SIZE;
+ char *optr = page;
+ int i, len;
+
+ for (i = 0; i < CONFIG_HISTO_SEEK_BUCKETS + 1; i++) {
+ if (i < CONFIG_HISTO_SEEK_BUCKETS)
+ len = snprintf(page, rem, "%ld\t%llu\n",
+ 1UL << (i + 3), seek_histo_stat_read(part, i));
+ else
+ len = snprintf(page, rem, "inf\t%llu\n",
+ seek_histo_stat_read(part, i));
+ page += len;
+ rem -= len;
+ }
+ return page - optr;
+}
+
+/*
* sysfs show() methods for the four histogram channels.
*/
ssize_t part_read_request_histo_show(struct device *dev,
@@ -1534,6 +1594,12 @@ ssize_t part_write_dma_histo_show(struct device *dev,
return dump_histo(dev_to_part(dev), WRITE, HISTO_DMA, page);
}
+ssize_t part_seek_histo_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ return dump_seek_histo(dev_to_part(dev), page);
+}
+
/*
* Reinitializes the read histograms to 0.
*/
@@ -1541,7 +1607,8 @@ ssize_t part_read_histo_clear(struct device *dev,
struct device_attribute *attr, const char *page, size_t count)
{
/* Ignore the data, just clear the histogram */
- int retval = block_disk_histogram_reset(dev_to_part(dev), READ);
+ int retval = block_disk_histogram_reset(dev_to_part(dev),
+ __block_part_histogram_reset, READ);
return (retval == 0 ? count : retval);
}
@@ -1551,7 +1618,19 @@ ssize_t part_read_histo_clear(struct device *dev,
ssize_t part_write_histo_clear(struct device *dev,
struct device_attribute *attr, const char *page, size_t count)
{
- int retval = block_disk_histogram_reset(dev_to_part(dev), WRITE);
+ int retval = block_disk_histogram_reset(dev_to_part(dev),
+ __block_part_histogram_reset, WRITE);
+ return (retval == 0 ? count : retval);
+}
+
+/*
+ * Reinitializes the seek histograms to 0.
+ */
+ssize_t part_seek_histo_clear(struct device *dev,
+ struct device_attribute *attr, const char *page, size_t count)
+{
+ int retval = block_disk_histogram_reset(dev_to_part(dev),
+ __block_part_seek_histogram_reset, 0);
return (retval == 0 ? count : retval);
}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index e0044d4..47e2591 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -309,6 +309,8 @@ static DEVICE_ATTR(write_request_histo, S_IRUGO | S_IWUSR,
part_write_request_histo_show, part_write_histo_clear);
static DEVICE_ATTR(write_dma_histo, S_IRUGO | S_IWUSR,
part_write_dma_histo_show, part_write_histo_clear);
+static DEVICE_ATTR(seek_histo, S_IRUGO | S_IWUSR,
+ part_seek_histo_show, part_seek_histo_clear);
#endif
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
@@ -328,6 +330,7 @@ static struct attribute *part_attrs[] = {
&dev_attr_read_dma_histo.attr,
&dev_attr_write_request_histo.attr,
&dev_attr_write_dma_histo.attr,
+ &dev_attr_seek_histo.attr,
#endif
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
@@ -436,6 +439,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
p->nr_sects = len;
p->partno = partno;
p->policy = get_disk_ro(disk);
+ init_part_histo_defaults(p);
dname = dev_name(ddev);
if (isdigit(dname[strlen(dname) - 1]))
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7406533..746b36b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -101,7 +101,8 @@ struct disk_stats {
* /sys/block/DEV/PART/read_request_histo,
* /sys/block/DEV/PART/write_request_histo,
* /sys/block/DEV/PART/read_dma_histo,
- * /sys/block/DEV/PART/write_dma_histo and the
+ * /sys/block/DEV/PART/write_dma_histo,
+ * /sysfs/block/DEV/PART/seek_histo and the
* /sys/block/DEV counterparts.
*
* The *request_histo files measure time from when the request is first
@@ -110,6 +111,7 @@ struct disk_stats {
*/
uint64_t rd_histo[2][CONFIG_HISTO_SIZE_BUCKETS][CONFIG_HISTO_TIME_BUCKETS];
uint64_t wr_histo[2][CONFIG_HISTO_SIZE_BUCKETS][CONFIG_HISTO_TIME_BUCKETS];
+ uint64_t seek_histo[CONFIG_HISTO_SEEK_BUCKETS + 1];
#endif
};
@@ -131,6 +133,9 @@ struct hd_struct {
#else
struct disk_stats dkstats;
#endif
+#ifdef CONFIG_BLOCK_HISTOGRAM
+ sector_t last_end_sector;
+#endif
struct rcu_head rcu_head;
};
@@ -399,13 +404,20 @@ extern ssize_t part_write_dma_histo_show(struct device *dev,
struct device_attribute *attr, char *page);
extern ssize_t part_write_dma_histo_show(struct device *dev,
struct device_attribute *attr, char *page);
+extern ssize_t part_seek_histo_show(struct device *dev,
+ struct device_attribute *attr, char *page);
extern ssize_t part_read_histo_clear(struct device *dev,
struct device_attribute *attr, const char *page, size_t count);
extern ssize_t part_write_histo_clear(struct device *dev,
struct device_attribute *attr, const char *page, size_t count);
+extern ssize_t part_seek_histo_clear(struct device *dev,
+ struct device_attribute *attr, const char *page, size_t count);
+
+extern void init_part_histo_defaults(struct hd_struct *part);
#else
static inline void block_histogram_completion(int cpu, struct hd_struct *part,
struct request *req) {}
+static inline void init_part_histo_defaults(struct hd_struct *part) {}
#endif
/* drivers/char/random.c */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/