[RFC v5 14/23] clockevents: decouple ->max_delta_ns from ->max_delta_ticks

From: Nicolai Stange
Date: Sat Sep 03 2016 - 21:32:08 EST


Before converting the given delta from ns to cycles by means of the
mult/shift pair, clockevents_program_event() enforces it to be less or
equal than ->max_delta_ns. Simplified, this reads as

delta = min(delta, dev->max_delta_ns);
clc = (delta * dev->mult) >> dev->shift;

A device's ->max_delta_ns is chosen such that
1.) The multiplication does not overflow 64 bits.
2.) clc fits into an unsigned long.
3.) clc <= the ->max_delta_ticks allowed by hardware.

These constraints are responsible for making ->max_delta_ns depend tightly
on ->max_delta_ticks and the device's frequency, i.e. its mult/shift pair.
As it stands, any update to ->mult would require a corresponding change of
->max_delta_ns as well and these two had to get consumed in the clockevent
programming path atomically. This would have a negative performance impact
as soon as we start adjusting ->mult from a different CPU with upcoming
changes making the clockevents core NTP correction aware: some kind of
locking would have been needed in the event programming path.

In order to circumvent this necessity, ->max_delta_ns could be made small
enough to cover the whole range of possible adjustments to ->mult,
eliminating the need to update it at all.

The timekeeping core never adjusts its mono clock's inverse frequency by
more than 11% (c.f. timekeeping_adjust()). This means that a clockevent
device's adjusted mult will never grow beyond 1 / (1-11%) = 112.4% of its
initial value. Thus, reducing ->max_delta_ns unconditionally to
1/112.4% = 89% would do the trick. (Actually, setting it to 8/9 = 88.89%
thereof allows for mult increases of up to 12.5% = 1/8 which is good for
binary arithmetic.)

However, such a decrease of ->max_delta_ns could become problematic for
devices whose ->max_delta_ticks is small, i.e. those for whom item 3.)
prevails.

In order to work around this, I chose to make ->max_delta_ns completely
independent of ->max_delta_ticks. It is set to 8/9 of the maximum value
satisfying conditions 1.) and 2.) for the given ->mult. Item 3.) is made
explicit by introducing an extra min(clc, ->max_delta_ticks) after the ns
to cycles conversion in clockevents_program_event(). This way, the maximum
programmable delta is reduced only for those devices which have got a large
->max_delta_ticks anyway. This comes at the cost of an extra min() in the
event programming path though.

So,
- In the case of CLOCK_EVT_FEAT_NO_ADJUST not being enabled, set
->max_delta_ns to 8/9 of the maximum possible value such that items 1.)
and 2.) hold for the given ->mult. OTOH, if CLOCK_EVT_FEAT_NO_ADJUST
is not set, set ->max_delta_ns to the full such value.

- Add a

clc = min(clc, dev->max_delta_ticks)

after the ns to cycle conversion in clockevents_program_event().

- Move the ->max_delta_ticks member into struct clock_event_device's
first 64 bytes in order to optimize for cacheline usage.

- Finally, preserve the semantics of /proc/timer_list by letting it display
the minimum of ->max_delta_ns and ->max_delta_ticks converted to ns at
the 'max_delta_ns' field.

Signed-off-by: Nicolai Stange <nicstange@xxxxxxxxx>
---
include/linux/clockchips.h | 4 ++--
kernel/time/clockevents.c | 50 +++++++++++++++++++++++++++++++++++++++++++++-
kernel/time/timer_list.c | 6 +++++-
3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 9a25185..be5f222 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -81,6 +81,7 @@ enum clock_event_state {
* @next_event: local storage for the next event in oneshot mode
* @max_delta_ns: maximum delta value in ns
* @min_delta_ns: minimum delta value in ns
+ * @max_delta_ticks: maximum delta value in ticks
* @mult: nanosecond to cycles multiplier
* @shift: nanoseconds to cycles divisor (power of two)
* @state_use_accessors:current state of the device, assigned by the core code
@@ -93,7 +94,6 @@ enum clock_event_state {
* @tick_resume: resume clkevt device
* @broadcast: function to broadcast events
* @min_delta_ticks: minimum delta value in ticks stored for reconfiguration
- * @max_delta_ticks: maximum delta value in ticks stored for reconfiguration
* @name: ptr to clock event name
* @rating: variable to rate clock event devices
* @irq: IRQ number (only for non CPU local devices)
@@ -109,6 +109,7 @@ struct clock_event_device {
ktime_t next_event;
u64 max_delta_ns;
u64 min_delta_ns;
+ unsigned long max_delta_ticks;
u32 mult;
u32 shift;
enum clock_event_state state_use_accessors;
@@ -125,7 +126,6 @@ struct clock_event_device {
void (*suspend)(struct clock_event_device *);
void (*resume)(struct clock_event_device *);
unsigned long min_delta_ticks;
- unsigned long max_delta_ticks;

const char *name;
int rating;
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f352f54..472fcc2 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/smp.h>
#include <linux/device.h>
+#include <asm/bitsperlong.h>

#include "tick-internal.h"

@@ -336,6 +337,9 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
delta = max(delta, (int64_t) dev->min_delta_ns);

clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+
+ clc = min_t(unsigned long, clc, dev->max_delta_ticks);
+
rc = dev->set_next_event((unsigned long) clc, dev);

return (rc && force) ? clockevents_program_min_delta(dev) : rc;
@@ -444,15 +448,59 @@ EXPORT_SYMBOL_GPL(clockevents_unbind_device);

static void __clockevents_update_bounds(struct clock_event_device *dev)
{
+ u64 max_delta_ns;
+
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return;

/*
+ * max_delta_ns is <= the maximum value such that the ns to
+ * cycles conversion in clockevents_program_event() doesn't
+ * overflow. It follows that is has to fulfill the following
+ * constraints:
+ * 1.) mult * max_delta_ns <= ULLONG_MAX
+ * 2.) (mult * max_delta_ns) >> shift <= ULONG_MAX
+ * On 32 bit archs, 2.) is stricter because shift <= 32 always
+ * holds, otherwise 1.) is.
+ *
+ * If CLOCK_EVT_FEAT_NO_ADJUST is not set, the actual
+ * ->max_delta_ns is set to a value equal to 8/9 of the
+ * maximum one possible. This gives some room for increasing
+ * mult by up to 12.5% which is just enough to compensate for
+ * the up to 11% mono clock frequency adjustments made by the
+ * timekeeping core, c.f. timekeeping_adjust().
+ *
+ */
+#if BITS_PER_LONG == 32
+ /*
+ * Set the lower BITS_PER_LONG + dev->shift bits.
+ * Note: dev->shift can be 32, so avoid undefined behaviour.
+ * The ^= below is really a |= here. However the former is the
+ * common idiom and recognizable by gcc.
+ */
+ max_delta_ns = (u64)1 << (BITS_PER_LONG + dev->shift - 1);
+ max_delta_ns ^= (dev->max_delta_ns - 1);
+#else
+ max_delta_ns = U64_MAX;
+#endif
+ if (unlikely(!dev->mult)) {
+ dev->mult = 1;
+ WARN_ON(1);
+ }
+ do_div(max_delta_ns, dev->mult);
+ dev->max_delta_ns = max_delta_ns;
+ if (!(dev->features & CLOCK_EVT_FEAT_NO_ADJUST)) {
+ /* reduce by 1/9 */
+ do_div(max_delta_ns, 9);
+ ++max_delta_ns; /* round up */
+ dev->max_delta_ns -= max_delta_ns;
+ }
+
+ /*
* cev_delta2ns() never returns values less than 1us and thus,
* we'll never program any ced with anything less.
*/
dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false);
- dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true);
}

/**
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ba7d8b2..ac20d4c 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -206,6 +206,10 @@ static void
print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
{
struct clock_event_device *dev = td->evtdev;
+ u64 max_delta_ns;
+
+ max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev);
+ max_delta_ns = min(max_delta_ns, dev->max_delta_ns);

SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
if (cpu < 0)
@@ -220,7 +224,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
}
SEQ_printf(m, "%s\n", dev->name);
SEQ_printf(m, " max_delta_ns: %llu\n",
- (unsigned long long) dev->max_delta_ns);
+ (unsigned long long) max_delta_ns);
SEQ_printf(m, " min_delta_ns: %llu\n",
(unsigned long long) dev->min_delta_ns);
SEQ_printf(m, " mult: %u\n", dev->mult);
--
2.9.3