Re: CFQ timer precision

From: Jeff Moyer
Date: Mon Nov 16 2015 - 12:23:18 EST


Hi Jan,

Jens Axboe <axboe@xxxxxxxxx> writes:

> On 11/16/2015 08:11 AM, Jan Kara wrote:
>> Hello,
>>
>> lately I was looking into a big performance hit we take when blkio
>> controller is enabled and jbd2 thread ends up in a different cgroup than
>> user process. E.g. dbench4 throughput drops from ~140 MB/s to ~20 MB/s.
>> However artificial dbench4 is, this kind of drop will likely be clearly
>> visible in real life workloads as well. With unified cgroup hierarchy
>> the above cgroup split between jbd2 and user processes is unavoidable
>> once you enable blkio controller so IMO we should accomodate that better.

Is group idle enabled? What happens if you set that to 0? What's the
storage?

>> I have couple of CFQ idling improvements / fixes which I'll post later this
>> week once I'll complete some round of benchmarking. They improve the
>> throughput to ~40 MB/s which helps but clearly there's still a big room for
>> improvement. The reason for the performance drop is essentially in idling
>> we do to avoid starvation of CFQ queues. Now when idling in this context,
>> current default of 8 ms idle window is far to large - we start the timer
>> after the final request is completed and thus we effectively give the
>> process 8 ms of CPU time to submit the next IO request. Which I think is
>> usually far too much.

I think I'd need to see blktrace data to make any real sense of this
problem statement. It's just too vague.

>> The problem is that more fine grained idling is
>> actually problematic because e.g. SUSE distro kernels have HZ=250 and thus
>> 1 jiffy is 4 ms. Hence my proposal: Do you think it would be OK to convert
>> CFQ to use highres timers and do all the accounting in microseconds?
>> Then we could tune the idle time to be say 1ms or even autotune it based on
>> process' think time both of which I expect would get us much closer to
>> original throughput (4 ms idle window gets us to ~70 MB/s with my patches,
>> disabling idling gets us to original throughput as expected).
>
> Converting to a non-jiffies timer base should be quite fine. We didn't
> have hrtimers when CFQ was written :-)

I actually have a patch that allows setting slices at the usec
granularity, but didn't go so far as to use hrtimers. I have no idea
why I wrote it, or whether it works. ;-) I've attached it in case it's
useful to you, Jan.

Cheers,
Jeff

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 5da8e6e..fc50fa77 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
-#include <linux/jiffies.h>
+#include <linux/ktime.h>
#include <linux/rbtree.h>
#include <linux/ioprio.h>
#include <linux/blktrace_api.h>
@@ -22,23 +22,23 @@
*/
/* max queue in one round of service */
static const int cfq_quantum = 8;
-static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+static const int cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
/* maximum backwards seek, in KiB */
static const int cfq_back_max = 16 * 1024;
/* penalty of a backwards seek */
static const int cfq_back_penalty = 2;
-static const int cfq_slice_sync = HZ / 10;
-static int cfq_slice_async = HZ / 25;
+static const int cfq_slice_sync = NSEC_PER_SEC / 10;
+static int cfq_slice_async = NSEC_PER_SEC / 25;
static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 125;
-static int cfq_group_idle = HZ / 125;
-static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static int cfq_slice_idle = NSEC_PER_SEC / 125;
+static int cfq_group_idle = NSEC_PER_SEC / 125;
+static const int cfq_target_latency = NSEC_PER_SEC * 3/10; /* 300 ms */
static const int cfq_hist_divisor = 4;

/*
* offset from end of service tree
*/
-#define CFQ_IDLE_DELAY (HZ / 5)
+#define CFQ_IDLE_DELAY (NSEC_PER_SEC / 5)

/*
* below this threshold, we consider thinktime immediate
@@ -89,7 +89,7 @@ struct cfq_rb_root {
struct cfq_ttime ttime;
};
#define CFQ_RB_ROOT (struct cfq_rb_root) { .rb = RB_ROOT, \
- .ttime = {.last_end_request = jiffies,},}
+ .ttime = {.last_end_request = ktime_get_ns(),},}

/*
* Per process-grouping structure
@@ -104,7 +104,7 @@ struct cfq_queue {
/* service_tree member */
struct rb_node rb_node;
/* service_tree key */
- unsigned long rb_key;
+ u64 rb_key;
/* prio tree member */
struct rb_node p_node;
/* prio tree root we belong to, if any */
@@ -125,8 +125,8 @@ struct cfq_queue {
unsigned int allocated_slice;
unsigned int slice_dispatch;
/* time when first request from queue completed and slice started. */
- unsigned long slice_start;
- unsigned long slice_end;
+ u64 slice_start;
+ u64 slice_end;
long slice_resid;

/* pending priority requests */
@@ -1046,11 +1046,12 @@ static inline void
cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+ u64 now = ktime_get_ns();

- cfqq->slice_start = jiffies;
- cfqq->slice_end = jiffies + slice;
+ cfqq->slice_start = now;
+ cfqq->slice_end = now + slice;
cfqq->allocated_slice = slice;
- cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
+ cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
}

/*
@@ -1062,7 +1063,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
{
if (cfq_cfqq_slice_new(cfqq))
return false;
- if (time_before(jiffies, cfqq->slice_end))
+ if (ktime_get_ns() < cfqq->slice_end)
return false;

return true;
@@ -1426,27 +1427,27 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
unsigned int *unaccounted_time)
{
unsigned int slice_used;
+ u64 now = ktime_get_ns();

/*
* Queue got expired before even a single request completed or
* got expired immediately after first request completion.
*/
- if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
+ if (!cfqq->slice_start || cfqq->slice_start == now) {
/*
* Also charge the seek time incurred to the group, otherwise
* if there are mutiple queues in the group, each can dispatch
* a single request on seeky media and cause lots of seek time
* and group will never know it.
*/
- slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
- 1);
+ slice_used = max_t(unsigned, (now - cfqq->dispatch_start), 1);
} else {
- slice_used = jiffies - cfqq->slice_start;
+ slice_used = now - cfqq->slice_start;
if (slice_used > cfqq->allocated_slice) {
*unaccounted_time = slice_used - cfqq->allocated_slice;
slice_used = cfqq->allocated_slice;
}
- if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+ if (cfqq->slice_start > cfqq->dispatch_start)
*unaccounted_time += cfqq->slice_start -
cfqq->dispatch_start;
}
@@ -1462,6 +1463,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
- cfqg->service_tree_idle.count;
unsigned int vfr;
+ u64 now = ktime_get_ns();

BUG_ON(nr_sync < 0);
used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1483,9 +1485,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
cfq_group_service_tree_add(st, cfqg);

/* This group is being expired. Save the context */
- if (time_after(cfqd->workload_expires, jiffies)) {
- cfqg->saved_wl_slice = cfqd->workload_expires
- - jiffies;
+ if (cfqd->workload_expires > now) {
+ cfqg->saved_wl_slice = cfqd->workload_expires - now;
cfqg->saved_wl_type = cfqd->serving_wl_type;
cfqg->saved_wl_class = cfqd->serving_wl_class;
} else
@@ -1517,7 +1518,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
*st = CFQ_RB_ROOT;
RB_CLEAR_NODE(&cfqg->rb_node);

- cfqg->ttime.last_end_request = jiffies;
+ cfqg->ttime.last_end_request = ktime_get_ns();
}

#ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2024,10 +2025,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
{
struct rb_node **p, *parent;
struct cfq_queue *__cfqq;
- unsigned long rb_key;
+ u64 rb_key;
struct cfq_rb_root *st;
int left;
int new_cfqq = 1;
+ u64 now = ktime_get_ns();

st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
if (cfq_class_idle(cfqq)) {
@@ -2037,7 +2039,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
__cfqq = rb_entry(parent, struct cfq_queue, rb_node);
rb_key += __cfqq->rb_key;
} else
- rb_key += jiffies;
+ rb_key += now;
} else if (!add_front) {
/*
* Get our rb key offset. Subtract any residual slice
@@ -2045,13 +2047,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
* count indicates slice overrun, and this should position
* the next service time further away in the tree.
*/
- rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
+ rb_key = cfq_slice_offset(cfqd, cfqq) + now;
rb_key -= cfqq->slice_resid;
cfqq->slice_resid = 0;
} else {
- rb_key = -HZ;
+ rb_key = -NSEC_PER_SEC;
__cfqq = cfq_rb_first(st);
- rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+ rb_key += __cfqq ? __cfqq->rb_key : now;
}

if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2077,7 +2079,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
/*
* sort by key, that represents service time.
*/
- if (time_before(rb_key, __cfqq->rb_key))
+ if (rb_key < __cfqq->rb_key)
p = &parent->rb_left;
else {
p = &parent->rb_right;
@@ -2377,7 +2379,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
* reposition in fifo if next is older than rq
*/
if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
- time_before(next->fifo_time, rq->fifo_time) &&
+ next->fifo_time < rq->fifo_time &&
cfqq == RQ_CFQQ(next)) {
list_move(&rq->queuelist, &next->queuelist);
rq->fifo_time = next->fifo_time;
@@ -2438,7 +2440,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
cfqd->serving_wl_class, cfqd->serving_wl_type);
cfqg_stats_update_avg_queue_size(cfqq->cfqg);
cfqq->slice_start = 0;
- cfqq->dispatch_start = jiffies;
+ cfqq->dispatch_start = ktime_get_ns();
cfqq->allocated_slice = 0;
cfqq->slice_end = 0;
cfqq->slice_dispatch = 0;
@@ -2487,7 +2489,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
if (cfq_cfqq_slice_new(cfqq))
cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
else
- cfqq->slice_resid = cfqq->slice_end - jiffies;
+ cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
}

@@ -2722,6 +2724,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
struct cfq_queue *cfqq = cfqd->active_queue;
struct cfq_io_cq *cic;
unsigned long sl, group_idle = 0;
+ u64 now = ktime_get_ns();

/*
* SSD device without seek penalty, disable idling. But only do so
@@ -2764,7 +2767,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* time slice.
*/
if (sample_valid(cic->ttime.ttime_samples) &&
- (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
+ (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
cic->ttime.ttime_mean);
return;
@@ -2781,7 +2784,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
else
sl = cfqd->cfq_slice_idle;

- mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+ mod_timer(&cfqd->idle_slice_timer, now + sl);
cfqg_stats_set_start_idle_time(cfqq->cfqg);
cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
group_idle ? 1 : 0);
@@ -2824,7 +2827,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
return NULL;

rq = rq_entry_fifo(cfqq->fifo.next);
- if (time_before(jiffies, rq->fifo_time))
+ if (ktime_get_ns() < rq->fifo_time)
rq = NULL;

cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -2909,7 +2912,7 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
/* select the one with lowest rb_key */
queue = cfq_rb_first(st_for(cfqg, wl_class, i));
if (queue &&
- (!key_valid || time_before(queue->rb_key, lowest_key))) {
+ (!key_valid || queue->rb_key < lowest_key)) {
lowest_key = queue->rb_key;
cur_best = i;
key_valid = true;
@@ -2927,6 +2930,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
struct cfq_rb_root *st;
unsigned group_slice;
enum wl_class_t original_class = cfqd->serving_wl_class;
+ u64 now = ktime_get_ns();

/* Choose next priority. RT > BE > IDLE */
if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -2935,7 +2939,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
cfqd->serving_wl_class = BE_WORKLOAD;
else {
cfqd->serving_wl_class = IDLE_WORKLOAD;
- cfqd->workload_expires = jiffies + 1;
+ cfqd->workload_expires = now + jiffies_to_nsecs(1);
return;
}

@@ -2953,7 +2957,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
/*
* check workload expiration, and that we still have other queues ready
*/
- if (count && !time_after(jiffies, cfqd->workload_expires))
+ if (count && !(now > cfqd->workload_expires))
return;

new_workload:
@@ -2999,7 +3003,7 @@ new_workload:

slice = max_t(unsigned, slice, CFQ_MIN_TT);
cfq_log(cfqd, "workload slice:%d", slice);
- cfqd->workload_expires = jiffies + slice;
+ cfqd->workload_expires = now + slice;
}

static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3017,16 +3021,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
static void cfq_choose_cfqg(struct cfq_data *cfqd)
{
struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
+ u64 now = ktime_get_ns();

cfqd->serving_group = cfqg;

/* Restore the workload type data */
if (cfqg->saved_wl_slice) {
- cfqd->workload_expires = jiffies + cfqg->saved_wl_slice;
+ cfqd->workload_expires = now + cfqg->saved_wl_slice;
cfqd->serving_wl_type = cfqg->saved_wl_type;
cfqd->serving_wl_class = cfqg->saved_wl_class;
} else
- cfqd->workload_expires = jiffies - 1;
+ cfqd->workload_expires = now - 1;

choose_wl_class_and_type(cfqd, cfqg);
}
@@ -3038,6 +3043,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
{
struct cfq_queue *cfqq, *new_cfqq = NULL;
+ u64 now = ktime_get_ns();

cfqq = cfqd->active_queue;
if (!cfqq)
@@ -3109,7 +3115,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
**/
if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
(cfq_cfqq_slice_new(cfqq) ||
- (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+ (cfqq->slice_end - now > now - cfqq->slice_start))) {
cfq_clear_cfqq_deep(cfqq);
cfq_clear_cfqq_idle_window(cfqq);
}
@@ -3187,11 +3193,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
+ u64 now = ktime_get_ns();
+
/* the queue hasn't finished any request, can't estimate */
if (cfq_cfqq_slice_new(cfqq))
return true;
- if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
- cfqq->slice_end))
+ if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
return true;

return false;
@@ -3266,7 +3273,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* based on the last sync IO we serviced
*/
if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
- unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
+ unsigned long last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
unsigned int depth;

depth = last_sync / cfqd->cfq_slice[1];
@@ -3352,7 +3359,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
cfq_class_idle(cfqq))) {
- cfqq->slice_end = jiffies + 1;
+ cfqq->slice_end = ktime_get_ns() + 1;
cfq_slice_expired(cfqd, 0);
}

@@ -3430,7 +3437,7 @@ static void cfq_init_icq(struct io_cq *icq)
{
struct cfq_io_cq *cic = icq_to_cic(icq);

- cic->ttime.last_end_request = jiffies;
+ cic->ttime.last_end_request = ktime_get_ns();
}

static void cfq_exit_icq(struct io_cq *icq)
@@ -3694,7 +3701,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
static void
__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
{
- unsigned long elapsed = jiffies - ttime->last_end_request;
+ unsigned long elapsed = ktime_get_ns() - ttime->last_end_request;
elapsed = min(elapsed, 2UL * slice_idle);

ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
@@ -3947,7 +3954,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
cfq_log_cfqq(cfqd, cfqq, "insert_request");
cfq_init_prio_data(cfqq, RQ_CIC(rq));

- rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
+ rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
list_add_tail(&rq->queuelist, &cfqq->fifo);
cfq_add_rq_rb(rq);
cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
@@ -3995,6 +4002,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
struct cfq_io_cq *cic = cfqd->active_cic;
+ u64 now = ktime_get_ns();

/* If the queue already has requests, don't wait */
if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4013,7 +4021,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)

/* if slice left is less than think time, wait busy */
if (cic && sample_valid(cic->ttime.ttime_samples)
- && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
+ && (cfqq->slice_end - now < cic->ttime.ttime_mean))
return true;

/*
@@ -4023,7 +4031,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* case where think time is less than a jiffy, mark the queue wait
* busy if only 1 jiffy is left in the slice.
*/
- if (cfqq->slice_end - jiffies == 1)
+ if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
return true;

return false;
@@ -4034,9 +4042,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
struct cfq_queue *cfqq = RQ_CFQQ(rq);
struct cfq_data *cfqd = cfqq->cfqd;
const int sync = rq_is_sync(rq);
- unsigned long now;
+ u64 now = ktime_get_ns();

- now = jiffies;
cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
!!(rq->cmd_flags & REQ_NOIDLE));

@@ -4064,7 +4071,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
cfqq_type(cfqq));

st->ttime.last_end_request = now;
- if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
+ if (!(rq->start_time + cfqd->cfq_fifo_expire[1] > now))
cfqd->last_delayed_sync = now;
}

@@ -4092,7 +4099,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
unsigned long extend_sl = cfqd->cfq_slice_idle;
if (!cfqd->cfq_slice_idle)
extend_sl = cfqd->cfq_group_idle;
- cfqq->slice_end = jiffies + extend_sl;
+ cfqq->slice_end = now + extend_sl;
cfq_mark_cfqq_wait_busy(cfqq);
cfq_log_cfqq(cfqd, cfqq, "will busy wait");
}
@@ -4468,7 +4475,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
* we optimistically start assuming sync ops weren't delayed in last
* second, in order to have larger depth for async operations.
*/
- cfqd->last_delayed_sync = jiffies - HZ;
+ cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
return 0;

out_free:
@@ -4501,7 +4508,7 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
struct cfq_data *cfqd = e->elevator_data; \
unsigned int __data = __VAR; \
if (__CONV) \
- __data = jiffies_to_msecs(__data); \
+ __data = __data / NSEC_PER_MSEC; \
return cfq_var_show(__data, (page)); \
}
SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4518,6 +4525,20 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
#undef SHOW_FUNCTION

+#define USEC_SHOW_FUNCTION(__FUNC, __VAR) \
+static ssize_t __FUNC(struct elevator_queue *e, char *page) \
+{ \
+ struct cfq_data *cfqd = e->elevator_data; \
+ unsigned int __data = __VAR; \
+ __data = __data / NSEC_PER_USEC; \
+ return cfq_var_show(__data, (page)); \
+}
+USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
+USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
+USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
+USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
+#undef USEC_SHOW_FUNCTION
+
#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
{ \
@@ -4529,7 +4550,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
else if (__data > (MAX)) \
__data = (MAX); \
if (__CONV) \
- *(__PTR) = msecs_to_jiffies(__data); \
+ *(__PTR) = __data * NSEC_PER_MSEC; \
else \
*(__PTR) = __data; \
return ret; \
@@ -4552,6 +4573,25 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
#undef STORE_FUNCTION

+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
+{ \
+ struct cfq_data *cfqd = e->elevator_data; \
+ unsigned int __data; \
+ int ret = cfq_var_store(&__data, (page), count); \
+ if (__data < (MIN)) \
+ __data = (MIN); \
+ else if (__data > (MAX)) \
+ __data = (MAX); \
+ *(__PTR) = __data * NSEC_PER_USEC; \
+ return ret; \
+}
+USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
#define CFQ_ATTR(name) \
__ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)

@@ -4562,10 +4602,14 @@ static struct elv_fs_entry cfq_attrs[] = {
CFQ_ATTR(back_seek_max),
CFQ_ATTR(back_seek_penalty),
CFQ_ATTR(slice_sync),
+ CFQ_ATTR(slice_sync_us),
CFQ_ATTR(slice_async),
+ CFQ_ATTR(slice_async_us),
CFQ_ATTR(slice_async_rq),
CFQ_ATTR(slice_idle),
+ CFQ_ATTR(slice_idle_us),
CFQ_ATTR(group_idle),
+ CFQ_ATTR(group_idle_us),
CFQ_ATTR(low_latency),
CFQ_ATTR(target_latency),
__ATTR_NULL
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/