Re: [patch 1/4] hrtimer: Handle remaining time proper for TIME_LOW_RES

From: Helge Deller
Date: Sat Jan 16 2016 - 13:37:43 EST



Hi Thomas,

* Thomas Gleixner <tglx@xxxxxxxxxxxxx>:
> If CONFIG_TIME_LOW_RES is enabled we add a jiffie to the relative timeout to
> prevent short sleeps, but we do not account for that in interfaces which
> retrieve the remaining time.
>
> Helge observed that timerfd can return a remaining time larger than the
> relative timeout. That's not expected and breaks userland test programs.
>
> Store the information that the timer was armed relative and provide functions
> to adjust the remaining time. To avoid bloating the hrtimer struct make state
> a u8, which as a bonus results in better code on x86 at least.
>
> Reported-by: Helge Deller <deller@xxxxxx>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

Great!
You can add for this series:
Tested-by: Helge Deller <deller@xxxxxx>

Any chance it can get backported to v4.4 ?

Thanks,
Helge

> ---
> include/linux/hrtimer.h | 34 ++++++++++++++++++++++++++---
> kernel/time/hrtimer.c | 55 +++++++++++++++++++++++++++++++----------------
> kernel/time/timer_list.c | 2 -
> 3 files changed, 69 insertions(+), 22 deletions(-)
>
> --- a/include/linux/hrtimer.h
> +++ b/include/linux/hrtimer.h
> @@ -87,7 +87,8 @@ enum hrtimer_restart {
> * @function: timer expiry callback function
> * @base: pointer to the timer base (per cpu and per clock)
> * @state: state information (See bit values above)
> - * @start_pid: timer statistics field to store the pid of the task which
> + * @is_rel: Set if the timer was armed relative
> + * @start_pid: timer statistics field to store the pid of the task which
> * started the timer
> * @start_site: timer statistics field to store the site where the timer
> * was started
> @@ -101,7 +102,8 @@ struct hrtimer {
> ktime_t _softexpires;
> enum hrtimer_restart (*function)(struct hrtimer *);
> struct hrtimer_clock_base *base;
> - unsigned long state;
> + u8 state;
> + u8 is_rel;
> #ifdef CONFIG_TIMER_STATS
> int start_pid;
> void *start_site;
> @@ -321,6 +323,27 @@ static inline void clock_was_set_delayed
>
> #endif
>
> +static inline ktime_t
> +__hrtimer_expires_remaining_adjusted(const struct hrtimer *timer, ktime_t now)
> +{
> + ktime_t rem = ktime_sub(timer->node.expires, now);
> +
> + /*
> + * Adjust relative timers for the extra we added in
> + * hrtimer_start_range_ns() to prevent short timeouts.
> + */
> + if (IS_ENABLED(CONFIG_TIME_LOW_RES) && timer->is_rel)
> + rem.tv64 -= hrtimer_resolution;
> + return rem;
> +}
> +
> +static inline ktime_t
> +hrtimer_expires_remaining_adjusted(const struct hrtimer *timer)
> +{
> + return __hrtimer_expires_remaining_adjusted(timer,
> + timer->base->get_time());
> +}
> +
> extern void clock_was_set(void);
> #ifdef CONFIG_TIMERFD
> extern void timerfd_clock_was_set(void);
> @@ -390,7 +413,12 @@ static inline void hrtimer_restart(struc
> }
>
> /* Query timers: */
> -extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
> +extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
> +
> +static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
> +{
> + return __hrtimer_get_remaining(timer, false);
> +}
>
> extern u64 hrtimer_get_next_event(void);
>
> --- a/kernel/time/hrtimer.c
> +++ b/kernel/time/hrtimer.c
> @@ -897,10 +897,10 @@ static int enqueue_hrtimer(struct hrtime
> */
> static void __remove_hrtimer(struct hrtimer *timer,
> struct hrtimer_clock_base *base,
> - unsigned long newstate, int reprogram)
> + u8 newstate, int reprogram)
> {
> struct hrtimer_cpu_base *cpu_base = base->cpu_base;
> - unsigned int state = timer->state;
> + u8 state = timer->state;
>
> timer->state = newstate;
> if (!(state & HRTIMER_STATE_ENQUEUED))
> @@ -930,7 +930,7 @@ static inline int
> remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
> {
> if (hrtimer_is_queued(timer)) {
> - unsigned long state = timer->state;
> + u8 state = timer->state;
> int reprogram;
>
> /*
> @@ -954,6 +954,22 @@ remove_hrtimer(struct hrtimer *timer, st
> return 0;
> }
>
> +static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
> + const enum hrtimer_mode mode)
> +{
> +#ifdef CONFIG_TIME_LOW_RES
> + /*
> + * CONFIG_TIME_LOW_RES indicates that the system has no way to return
> + * granular time values. For relative timers we add hrtimer_resolution
> + * (i.e. one jiffie) to prevent short timeouts.
> + */
> + timer->is_rel = mode & HRTIMER_MODE_REL;
> + if (timer->is_rel)
> + tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
> +#endif
> + return tim;
> +}
> +
> /**
> * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
> * @timer: the timer to be added
> @@ -974,19 +990,10 @@ void hrtimer_start_range_ns(struct hrtim
> /* Remove an active timer from the queue: */
> remove_hrtimer(timer, base, true);
>
> - if (mode & HRTIMER_MODE_REL) {
> + if (mode & HRTIMER_MODE_REL)
> tim = ktime_add_safe(tim, base->get_time());
> - /*
> - * CONFIG_TIME_LOW_RES is a temporary way for architectures
> - * to signal that they simply return xtime in
> - * do_gettimeoffset(). In this case we want to round up by
> - * resolution when starting a relative timer, to avoid short
> - * timeouts. This will go away with the GTOD framework.
> - */
> -#ifdef CONFIG_TIME_LOW_RES
> - tim = ktime_add_safe(tim, ktime_set(0, hrtimer_resolution));
> -#endif
> - }
> +
> + tim = hrtimer_update_lowres(timer, tim, mode);
>
> hrtimer_set_expires_range_ns(timer, tim, delta_ns);
>
> @@ -1074,19 +1081,23 @@ EXPORT_SYMBOL_GPL(hrtimer_cancel);
> /**
> * hrtimer_get_remaining - get remaining time for the timer
> * @timer: the timer to read
> + * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y
> */
> -ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
> +ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
> {
> unsigned long flags;
> ktime_t rem;
>
> lock_hrtimer_base(timer, &flags);
> - rem = hrtimer_expires_remaining(timer);
> + if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
> + rem = hrtimer_expires_remaining_adjusted(timer);
> + else
> + rem = hrtimer_expires_remaining(timer);
> unlock_hrtimer_base(timer, &flags);
>
> return rem;
> }
> -EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
> +EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
>
> #ifdef CONFIG_NO_HZ_COMMON
> /**
> @@ -1220,6 +1231,14 @@ static void __run_hrtimer(struct hrtimer
> fn = timer->function;
>
> /*
> + * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
> + * timer is restarted with a period then it becomes an absolute
> + * timer. If its not restarted it does not matter.
> + */
> + if (IS_ENABLED(CONFIG_TIME_LOW_RES))
> + timer->is_rel = false;
> +
> + /*
> * Because we run timers from hardirq context, there is no chance
> * they get migrated to another cpu, therefore its safe to unlock
> * the timer base.
> --- a/kernel/time/timer_list.c
> +++ b/kernel/time/timer_list.c
> @@ -69,7 +69,7 @@ print_timer(struct seq_file *m, struct h
> print_name_offset(m, taddr);
> SEQ_printf(m, ", ");
> print_name_offset(m, timer->function);
> - SEQ_printf(m, ", S:%02lx", timer->state);
> + SEQ_printf(m, ", S:%02x", timer->state);
> #ifdef CONFIG_TIMER_STATS
> SEQ_printf(m, ", ");
> print_name_offset(m, timer->start_site);
>
>