Re: [PATCH] KVM: x86: fix kvmclock breakage from timers branch merge

From: Paolo Bonzini
Date: Thu Sep 04 2014 - 13:14:42 EST


Il 04/09/2014 18:00, Chris J Arges ha scritto:
> Uptime:
> 15:58:02 up 1:00, 1 user, load average: 0.59, 0.60, 0.31
>
> Here is the output:
>
> ./x86-run x86/kvmclock_test.flat -smp 2 --append "10000000 `date +%s`"
> qemu-system-x86_64 -enable-kvm -device pc-testdev -device
> isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio
> -device pci-testdev -kernel x86/kvmclock_test.flat -smp 2 --append
> 10000000 1409846210
> enabling apic
> enabling apic
> kvm-clock: cpu 0, msr 0x:44d4c0
> kvm-clock: cpu 0, msr 0x:44d4c0
> Wallclock test, threshold 5
> Seconds get from host: 1409846210
> Seconds get from kvmclock: 2819688866
> Offset: 1409842656

With kvm/queue this would have been roughly -3600, now it's
host_wallclock-3600. So the patch hasn't fixed the -3600 part for you.

Can you try applying this patch on top of 3.16? This is my backport of
Thomas's patch. If this works for you, we "only" have to find out how
to compute boot_ns and nsec_base in the new timekeeping world order of
3.17...

Thomas, do you have any ideas? Every time a VM is started, the kvmclock
starts at the boot time of the host, instead of the current wallclock time.

Paolo

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d38abc81db65..70de23f1de51 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1020,9 +1020,8 @@ struct pvclock_gtod_data {
u32 shift;
} clock;

- /* open coded 'struct timespec' */
- u64 monotonic_time_snsec;
- time_t monotonic_time_sec;
+ u64 boot_ns;
+ u64 nsec_base;
};

static struct pvclock_gtod_data pvclock_gtod_data;
@@ -1030,6 +1029,12 @@ static struct pvclock_gtod_data pvclock_gtod_data;
static void update_pvclock_gtod(struct timekeeper *tk)
{
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
+ u64 boot_ns;
+
+ boot_ns = timespec_to_ns(&tk->total_sleep_time)
+ + tk->wall_to_monotonic.tv_sec * (u64)NSEC_PER_SEC
+ + tk->wall_to_monotonic.tv_nsec
+ + tk->xtime_sec * (u64)NSEC_PER_SEC;

write_seqcount_begin(&vdata->seq);

@@ -1040,17 +1044,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
vdata->clock.mult = tk->mult;
vdata->clock.shift = tk->shift;

- vdata->monotonic_time_sec = tk->xtime_sec
- + tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_snsec = tk->xtime_nsec
- + (tk->wall_to_monotonic.tv_nsec
- << tk->shift);
- while (vdata->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->shift)) {
- vdata->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->shift;
- vdata->monotonic_time_sec++;
- }
+ vdata->boot_ns = boot_ns;
+ vdata->nsec_base = tk->xtime_nsec;

write_seqcount_end(&vdata->seq);
}
@@ -1414,23 +1409,22 @@ static inline u64 vgettsc(cycle_t *cycle_now)
return v * gtod->clock.mult;
}

-static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
+static int do_monotonic_boot(s64 *t, cycle_t *cycle_now)
{
+ struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
- u64 ns;
int mode;
- struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+ u64 ns;

- ts->tv_nsec = 0;
do {
seq = read_seqcount_begin(&gtod->seq);
mode = gtod->clock.vclock_mode;
- ts->tv_sec = gtod->monotonic_time_sec;
- ns = gtod->monotonic_time_snsec;
+ ns = gtod->nsec_base;
ns += vgettsc(cycle_now);
ns >>= gtod->clock.shift;
+ ns += gtod->boot_ns;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
- timespec_add_ns(ts, ns);
+ *t = ns;

return mode;
}
@@ -1438,19 +1432,11 @@ static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
/* returns true if host is using tsc clocksource */
static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
{
- struct timespec ts;
-
/* checked again under seqlock below */
if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
return false;

- if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
- return false;
-
- monotonic_to_bootbased(&ts);
- *kernel_ns = timespec_to_ns(&ts);
-
- return true;
+ return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
}
#endif



> My observation is that the kvmclock value seems to be positively biased
> by the boot_ns value.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/