Re: [PATCH v5 1/6] timekeeper: introduce extended clocksource reading callback

From: Thomas Gleixner
Date: Mon Sep 25 2017 - 18:00:41 EST


On Wed, 30 Aug 2017, Denis Plotnikov wrote:

> The callback provides extended information about just read
> clocksource value.
>
> It's going to be used in cases when detailed system information
> needed for further time related values calculation, e.g in KVM
> masterclock settings calculation.
>
> Signed-off-by: Denis Plotnikov <dplotnikov@xxxxxxxxxxxxx>
> ---
> include/linux/clocksource.h | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
> index a78cb18..786a522 100644
> --- a/include/linux/clocksource.h
> +++ b/include/linux/clocksource.h
> @@ -48,7 +48,14 @@ struct module;
> * 400-499: Perfect
> * The ideal clocksource. A must-use where
> * available.
> - * @read: returns a cycle value, passes clocksource as argument
> + * @read: returns a cycle value (might be not quite cycles:
> + * see pvclock) passes clocksource as argument
> + * @read_with_stamp: saves cycles value (got from timekeeper) and cycles
> + * stamp (got from hardware counter value and used by
> + * timekeeper to calculate the cycles value) to
> + * corresponding input pointers return true if cycles
> + * stamp holds real cycles and false if it holds some
> + * time derivative value

Neither the changelog nor this comment make any sense. Not to talk about
the actual TSC side implementation which does the same as tsc_read() - it
actually uses tsc_read() - but stores the same value twice and
unconditionally returns true.

I have no idea why you need this extra voodoo function if you can achieve
the same thing with a simple property bit in clocksource->flags.

Neither do I understand the rest of the magic you introduce in the snapshot
code:

> + if (clock->read_with_stamp)
> + systime_snapshot->cycles_valid =
> + clock->read_with_stamp(
> + clock, &now, &systime_snapshot->cycles);
> + else {
> + systime_snapshot->cycles_valid = false;
> + now = clock->read(clock);
> + systime_snapshot->cycles = now;
> + }

The only difference between the two code pathes is the effect on
systime_snapshot->cycles_valid. The explanation of that bit is not making
much sense either.

+ * @cycles_valid: The flag is true when @cycles contain actual
+ * number of cycles instead some cycle derivative

Why the heck would cycles be something different than what clock->read()
returns?

What you really want to convey is the information whether

now = tk_clock_read(&tk->tkr_mono);

is a value which you can use for your pvclock correlation or not.

Unless I'm missing something important all of this can be achieved with a
minimal and actually understandable patch. See below.

Thanks,

tglx

8<------------------
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1045,7 +1045,8 @@ static struct clocksource clocksource_ts
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
- CLOCK_SOURCE_MUST_VERIFY,
+ CLOCK_SOURCE_MUST_VERIFY |
+ CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -118,7 +118,9 @@ struct clocksource {
#define CLOCK_SOURCE_VALID_FOR_HRES 0x20
#define CLOCK_SOURCE_UNSTABLE 0x40
#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80
-#define CLOCK_SOURCE_RESELECT 0x100
+#define CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE 0x100
+
+#define CLOCK_SOURCE_RESELECT 0x200

/* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -285,6 +285,8 @@ extern void ktime_get_raw_and_real_ts64(
* @raw: Monotonic raw system time
* @clock_was_set_seq: The sequence number of clock was set events
* @cs_was_changed_seq: The sequence number of clocksource change events
+ * @valid_for_pvclock_update: @cycles is from a clocksource which
+ * can be used for pvclock updates
*/
struct system_time_snapshot {
u64 cycles;
@@ -292,6 +294,7 @@ struct system_time_snapshot {
ktime_t raw;
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
+ bool valid_for_pvclock_update;
};

/*
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -948,7 +948,7 @@ time64_t __ktime_get_real_seconds(void)
void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned long seq, clk_flags;
ktime_t base_raw;
ktime_t base_real;
u64 nsec_raw;
@@ -960,6 +960,7 @@ void ktime_get_snapshot(struct system_ti
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tk_clock_read(&tk->tkr_mono);
+ clk_flags = tk->tkr_mono.clock->flags;
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
base_real = ktime_add(tk->tkr_mono.base,
@@ -970,6 +971,8 @@ void ktime_get_snapshot(struct system_ti
} while (read_seqcount_retry(&tk_core.seq, seq));

systime_snapshot->cycles = now;
+ systime_snapshot->valid_for_pvclock_update =
+ clk_flags & CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE,
systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
}