Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer

From: Kirill Tkhai
Date: Fri Jun 05 2015 - 05:08:32 EST

Next message: Peter Zijlstra: "Re: [RFC][PATCH 4/4] time: Do leapsecond adjustment in gettime fastpaths"
Previous message: Dan Carpenter: "Re: [PATCH v3 7/8] staging:lustre: style cleanups for LNet headers"
In reply to: Kirill Tkhai: "Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer"
Next in thread: Peter Zijlstra: "Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This message is too late, /me going to see new series :)

05.06.2015, 12:02, "Kirill Tkhai" <tkhai@xxxxxxxxx>:
> В Чт, 04/06/2015 в 12:49 +0200, Peter Zijlstra пишет:
> On Thu, Jun 04, 2015 at 12:07:03PM +0300, Kirill Tkhai wrote:
>>>> --- a/include/linux/hrtimer.h
>>>> +++ b/include/linux/hrtimer.h
>>>> @@ -391,11 +391,25 @@ extern u64 hrtimer_get_next_event(void);
>>>>    * A timer is active, when it is enqueued into the rbtree or the
>>>>    * callback function is running or it's in the state of being migrated
>>>>    * to another cpu.
>>>> + *
>>>> + * See __run_hrtimer().
>>>>    */
>>>> -static inline int hrtimer_active(const struct hrtimer *timer)
>>>> +static inline bool hrtimer_active(const struct hrtimer *timer)
>>>>   {
>>>> - return timer->state != HRTIMER_STATE_INACTIVE ||
>>>> - timer->base->running == timer;
>>>> + if (timer->state != HRTIMER_STATE_INACTIVE)
>>>> + return true;
>>>> +
>>>> + smp_rmb(); /* C matches A */
>>>> +
>>>> + if (timer->base->running == timer)
>>>> + return true;
>>>> +
>>>> + smp_rmb(); /* D matches B */
>>>> +
>>>> + if (timer->state != HRTIMER_STATE_INACTIVE)
>>>> + return true;
>>>> +
>>>> + return false;
>>> This races with two sequential timer handlers. hrtimer_active()
>>> is preemptible everywhere, and no guarantees that all three "if"
>>> conditions check the same timer tick.
>> Indeed.
>>> How about transformation of hrtimer_bases.lock: raw_spinlock_t --> seqlock_t?
>> Ingo will like that because it means we already need to touch cpu_base.
>>
>> But I think there's a problem there on timer migration, the timer can
>> migrate between bases while we do the seq read loop and then you can get
>> false positives on the different seqcount numbers.
>>
>> We could of course do something like the below, but hrtimer_is_active()
>> is turning into quite the monster.
>>
>> Needs more comments at the very least, its fully of trickery.
>
> Yeah, it's safe for now, but it may happen difficulties with a support
> in the future, because barrier logic is not easy to review. But it seems
> we may simplify it a little bit. Please, see the comments below.
>> ---
>> --- a/include/linux/hrtimer.h
>> +++ b/include/linux/hrtimer.h
>> @@ -59,7 +59,9 @@ enum hrtimer_restart {
>>    * mean touching the timer after the callback, this makes it impossible to free
>>    * the timer from the callback function.
>>    *
>> - * Therefore we track the callback state in timer->base->running == timer.
>> + * Therefore we track the callback state in:
>> + *
>> + * timer->base->cpu_base->running == timer
>>    *
>>    * On SMP it is possible to have a "callback function running and enqueued"
>>    * status. It happens for example when a posix timer expired and the callback
>> @@ -144,7 +146,6 @@ struct hrtimer_clock_base {
>>           struct timerqueue_head active;
>>           ktime_t (*get_time)(void);
>>           ktime_t offset;
>> - struct hrtimer *running;
>>   } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN)));
>>
>>   enum hrtimer_base_type {
>> @@ -159,6 +160,8 @@ enum hrtimer_base_type {
>>    * struct hrtimer_cpu_base - the per cpu clock bases
>>    * @lock: lock protecting the base and associated clock bases
>>    * and timers
>> + * @seq: seqcount around __run_hrtimer
>> + * @running: pointer to the currently running hrtimer
>>    * @cpu: cpu number
>>    * @active_bases: Bitfield to mark bases with active timers
>>    * @clock_was_set_seq: Sequence counter of clock was set events
>> @@ -180,6 +183,8 @@ enum hrtimer_base_type {
>>    */
>>   struct hrtimer_cpu_base {
>>           raw_spinlock_t lock;
>> + seqcount_t seq;
>> + struct hrtimer *running;
>>           unsigned int cpu;
>>           unsigned int active_bases;
>>           unsigned int clock_was_set_seq;
>> @@ -394,8 +399,24 @@ extern u64 hrtimer_get_next_event(void);
>>    */
>>   static inline int hrtimer_active(const struct hrtimer *timer)
>>   {
>> - return timer->state != HRTIMER_STATE_INACTIVE ||
>> - timer->base->running == timer;
>> + struct hrtimer_cpu_base *cpu_base;
>> + unsigned int seq;
>> + bool active;
>> +
>> + do {
>> + active = false;
>> + cpu_base = READ_ONCE(timer->base->cpu_base);
>> + seqcount_lockdep_reader_access(&cpu_base->seq);
>> + seq = raw_read_seqcount(&cpu_base->seq);
>> +
>> + if (timer->state != HRTIMER_STATE_INACTIVE ||
>> +    cpu_base->running == timer)
>> + active = true;
>> +
>> + } while (read_seqcount_retry(&cpu_base->seq, seq) ||
>> + cpu_base != READ_ONCE(timer->base->cpu_base));
>> +
>> + return active;
>>   }
>
> This may race with migrate_hrtimer_list(), so it needs write seqcounter
> too.
>>   /*
>> @@ -412,7 +433,7 @@ static inline int hrtimer_is_queued(stru
>>    */
>>   static inline int hrtimer_callback_running(struct hrtimer *timer)
>>   {
>> - return timer->base->running == timer;
>> + return timer->base->cpu_base->running == timer;
>>   }
>>
>>   /* Forward a hrtimer so it expires after now: */
>> --- a/kernel/time/hrtimer.c
>> +++ b/kernel/time/hrtimer.c
>> @@ -67,6 +67,7 @@
>>   DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
>>   {
>>           .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
>> + .seq = SEQCNT_ZERO(hrtimer_bases.seq),
>>           .clock_base =
>>           {
>>                   {
>> @@ -113,9 +114,15 @@ static inline int hrtimer_clockid_to_bas
>>   /*
>>    * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
>>    * such that hrtimer_callback_running() can unconditionally dereference
>> - * timer->base.
>> + * timer->base->cpu_base
>>    */
>> -static struct hrtimer_clock_base migration_base;
>> +static struct hrtimer_cpu_base migration_cpu_base = {
>> + .seq = SEQCNT_ZERO(migration_cpu_base),
>> +};
>> +
>> +static struct hrtimer_clock_base migration_base {
>> + .cpu_base = &migration_cpu_base,
>> +};
>>
>>   /*
>>    * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
>> @@ -1118,10 +1125,16 @@ static void __run_hrtimer(struct hrtimer
>>           enum hrtimer_restart (*fn)(struct hrtimer *);
>>           int restart;
>>
>> - WARN_ON(!irqs_disabled());
>> + lockdep_assert_held(&cpu_base->lock);
>>
>>           debug_deactivate(timer);
>> - base->running = timer;
>> + cpu_base->running = timer;
>
> My suggestion is do not use seqcounters for long parts of code, and implement
> short primitives for changing timer state and cpu_base running timer. Something
> like this:
>
> static inline void hrtimer_set_state(struct hrtimer *timer, unsigned long state)
> {
>         struct hrtimer_cpu_base *cpu_base = timer->base->cpu_base;
>
>         lockdep_assert_held(&cpu_base->lock);
>
>         write_seqcount_begin(&cpu_base->seq);
>         timer->state = state;
>         write_seqcount_end(&cpu_base->seq);
> }
>
> static inline void cpu_base_set_running(struct hrtimer_cpu_base *cpu_base,
>                                         struct hrtimer *timer)
> {
>         lockdep_assert_held(&cpu_base->lock);
>
>         write_seqcount_begin(&cpu_base->seq);
>         cpu_base->running = timer;
>         write_seqcount_end(&cpu_base->seq);
> }
>
> Implemented this, we may less think about right barrier order, because
> all changes are being made under seqcount.
>
> static inline int hrtimer_active(const struct hrtimer *timer)
> {
>         struct hrtimer_cpu_base *cpu_base;
>         struct hrtimer_clock_base *base;
>         unsigned int seq;
>         bool active = false;
>
>         do {
>                 base = READ_ONCE(timer->base);
>                 if (base == &migration_base) {
>                         active = true;
>                         break;
>                 }
>
>                 cpu_base = base->cpu_base;
>                 seqcount_lockdep_reader_access(&cpu_base->seq);
>                 seq = raw_read_seqcount(&cpu_base->seq);
>
>                 if (timer->state != HRTIMER_STATE_INACTIVE ||
>                     cpu_base->running == timer) {
>                         active = true;
>                         break;
>                 }
>         } while (read_seqcount_retry(&cpu_base->seq, seq) ||
>                  READ_ONCE(timer->base) != base);
>
>         return active;
> }
>> +
>> + /*
>> + * separate the ->running assignment from the ->state assignment
>> + */
>> + write_seqcount_begin(&cpu_base->seq);
>> +
>>           __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
>>           timer_stats_account_hrtimer(timer);
>>           fn = timer->function;
>> @@ -1150,8 +1163,13 @@ static void __run_hrtimer(struct hrtimer
>>               !(timer->state & HRTIMER_STATE_ENQUEUED))
>>                   enqueue_hrtimer(timer, base);
>>
>> - WARN_ON_ONCE(base->running != timer);
>> - base->running = NULL;
>> + /*
>> + * separate the ->running assignment from the ->state assignment
>> + */
>> + write_seqcount_end(&cpu_base->seq);
>> +
>> + WARN_ON_ONCE(cpu_base->running != timer);
>> + cpu_base->running = NULL;
>>   }
>>
>>   static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

Next message: Peter Zijlstra: "Re: [RFC][PATCH 4/4] time: Do leapsecond adjustment in gettime fastpaths"
Previous message: Dan Carpenter: "Re: [PATCH v3 7/8] staging:lustre: style cleanups for LNet headers"
In reply to: Kirill Tkhai: "Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer"
Next in thread: Peter Zijlstra: "Re: [PATCH 8/9] hrtimer: Allow hrtimer::function() to free the timer"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]