Re: [patch 3/3] PTP: add kvm PTP driver

From: Radim Krcmar
Date: Fri Jan 13 2017 - 10:57:06 EST


2017-01-13 10:01-0200, Marcelo Tosatti:
> Add a driver with gettime method returning hosts realtime clock.
> This allows Chrony to synchronize host and guest clocks with
> high precision (see results below).
>
> chronyc> sources
> MS Name/IP address Stratum Poll Reach LastRx Last sample
> ===============================================================================
> #* PHC0 0 3 377 6 +4ns[ +4ns] +/- 3ns
>
> To configure Chronyd to use PHC refclock, add the
> following line to its configuration file:
>
> refclock PHC /dev/ptpX poll 3 dpoll -2 offset 0
>
> Where /dev/ptpX is the kvmclock PTP clock.
>
>
> ---
> drivers/ptp/Kconfig | 12 +++
> drivers/ptp/Makefile | 1
> drivers/ptp/ptp_kvm.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 193 insertions(+)
>
> Index: kvm-ptpdriver/drivers/ptp/Kconfig
> ===================================================================
> --- kvm-ptpdriver.orig/drivers/ptp/Kconfig 2017-01-13 09:17:31.724568567 -0200
> +++ kvm-ptpdriver/drivers/ptp/Kconfig 2017-01-13 09:55:33.344208894 -0200
> @@ -90,4 +90,16 @@
> To compile this driver as a module, choose M here: the module
> will be called ptp_pch.
>
> +config PTP_1588_CLOCK_KVM
> + tristate "KVM virtual PTP clock"
> + depends on PTP_1588_CLOCK
> + depends on KVM_GUEST
> + default y
> + help
> + This driver adds support for using kvm infrastructure as a PTP
> + clock. This clock is only useful if you are using KVM guests.
> +
> + To compile this driver as a module, choose M here: the module
> + will be called ptp_kvm.
> +
> endmenu
> Index: kvm-ptpdriver/drivers/ptp/ptp_kvm.c
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ kvm-ptpdriver/drivers/ptp/ptp_kvm.c 2017-01-13 09:57:55.013440645 -0200
> @@ -0,0 +1,180 @@
> +/*
> + * Virtual PTP 1588 clock for use with KVM guests
> + *
> + * Copyright (C) 2017 Red Hat Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + */
> +#include <linux/device.h>
> +#include <linux/err.h>
> +#include <linux/init.h>
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <uapi/linux/kvm_para.h>
> +#include <asm/kvm_para.h>
> +#include <asm/pvclock.h>
> +#include <uapi/asm/kvm_para.h>
> +
> +#include <linux/ptp_clock_kernel.h>
> +
> +struct kvm_ptp_clock {
> + struct ptp_clock *ptp_clock;
> + struct ptp_clock_info caps;
> +};
> +
> +DEFINE_SPINLOCK(kvm_ptp_lock);
> +
> +static struct pvclock_vsyscall_time_info *hv_clock;
> +
> +/*
> + * PTP clock operations
> + */
> +
> +static int ptp_kvm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static int ptp_kvm_adjtime(struct ptp_clock_info *ptp, s64 delta)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static struct kvm_clock_offset clock_off;
> +static phys_addr_t clock_off_gpa;
> +
> +static int ptp_kvm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
> +{
> + unsigned long ret;
> + struct timespec64 tspec;
> + u64 delta;
> + cycle_t offset;
> + unsigned version;
> + int cpu;
> + struct pvclock_vcpu_time_info *src;
> +
> + preempt_disable_notrace();
> + cpu = smp_processor_id();
> + src = &hv_clock[cpu].pvti;
> +
> + spin_lock(&kvm_ptp_lock);
> +
> + do {
> + /*
> + * We are measuring the delay between
> + * kvm_hypercall and rdtsc using TSC,
> + * and converting that delta to
> + * tsc_to_system_mul and tsc_shift
> + * So any changes to tsc_to_system_mul
> + * and tsc_shift in this region
> + * invalidate the measurement.
> + */

This assumes that the host uses kvmclock, but the guest can be using
just TSC and even have kvmclock disabled. This code should at least
check that kvmclock enabled.

(If we made kvmclock mandatory, then we could fix pvclock_wall_clock
interface, because it is already defined to provide real time based on
kvmclock ...)

> + version = pvclock_read_begin(src);
> +
> + ret = kvm_hypercall2(KVM_HC_CLOCK_OFFSET,
> + clock_off_gpa,
> + KVM_CLOCK_OFFSET_WALLCLOCK);
> + if (ret != 0) {
> + pr_err("clock offset hypercall ret %lu\n", ret);
> + spin_unlock(&kvm_ptp_lock);
> + preempt_enable_notrace();
> + return -EOPNOTSUPP;
> + }
> +
> + tspec.tv_sec = clock_off.sec;
> + tspec.tv_nsec = clock_off.nsec;
> +
> + delta = rdtsc_ordered() - clock_off.tsc;
> +
> + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
> + src->tsc_shift);
> +
> + } while (pvclock_read_retry(src, version));
> +
> + preempt_enable_notrace();
> +
> + tspec.tv_nsec = tspec.tv_nsec + offset;
> +
> + spin_unlock(&kvm_ptp_lock);
> +
> + if (tspec.tv_nsec >= NSEC_PER_SEC) {
> + u64 secs = tspec.tv_nsec;
> +
> + tspec.tv_nsec = do_div(secs, NSEC_PER_SEC);
> + tspec.tv_sec += secs;
> + }
> +
> + memcpy(ts, &tspec, sizeof(struct timespec64));

But the whole idea is of improving the time by reading tsc a bit later
is just weird ... why is it better to provide

tsc + x, time + tsc_delta_to_time(x)

than just

tsc, time

?

Because we'll always be quering the time at tsc + y, where y >> x, and
we'd likely have other problems if shifting the time base by few
thousand cycles made a difference.

> +
> + return 0;
> +}
> +
> +static int ptp_kvm_settime(struct ptp_clock_info *ptp,
> + const struct timespec64 *ts)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static int ptp_kvm_enable(struct ptp_clock_info *ptp,
> + struct ptp_clock_request *rq, int on)
> +{
> + return -EOPNOTSUPP;
> +}
> +
> +static struct ptp_clock_info ptp_kvm_caps = {
> + .owner = THIS_MODULE,
> + .name = "KVM virtual PTP",
> + .max_adj = 0,
> + .n_ext_ts = 0,
> + .n_pins = 0,
> + .pps = 0,
> + .adjfreq = ptp_kvm_adjfreq,
> + .adjtime = ptp_kvm_adjtime,
> + .gettime64 = ptp_kvm_gettime,
> + .settime64 = ptp_kvm_settime,
> + .enable = ptp_kvm_enable,
> +};
> +
> +/* module operations */
> +
> +static struct kvm_ptp_clock kvm_ptp_clock;
> +
> +static void __exit ptp_kvm_exit(void)
> +{
> + ptp_clock_unregister(kvm_ptp_clock.ptp_clock);
> +}
> +
> +static int __init ptp_kvm_init(void)
> +{
> + if (!kvm_para_available())
> + return -ENODEV;
> +
> + kvm_ptp_clock.caps = ptp_kvm_caps;
> +
> + kvm_ptp_clock.ptp_clock = ptp_clock_register(&kvm_ptp_clock.caps, NULL);

It is a shame that the infrastructure uses polling when the guest could
be notified on every host real time change, but this should be good
enough.

> +
> + if (IS_ERR(kvm_ptp_clock.ptp_clock))
> + return PTR_ERR(kvm_ptp_clock.ptp_clock);
> +
> + clock_off_gpa = slow_virt_to_phys(&clock_off);
> +
> + hv_clock = pvclock_pvti_cpu0_va();

Would safer to assign required globals before the registration -- races
could get ugly.

Thanks.

> +
> + return 0;
> +}
> +
> +module_init(ptp_kvm_init);
> +module_exit(ptp_kvm_exit);
> +
> +MODULE_AUTHOR("Marcelo Tosatti <mtosatti@xxxxxxxxxx>");
> +MODULE_DESCRIPTION("PTP clock using KVMCLOCK");
> +MODULE_LICENSE("GPL");
> Index: kvm-ptpdriver/drivers/ptp/Makefile
> ===================================================================
> --- kvm-ptpdriver.orig/drivers/ptp/Makefile 2017-01-13 09:17:31.724568567 -0200
> +++ kvm-ptpdriver/drivers/ptp/Makefile 2017-01-13 09:17:58.997609570 -0200
> @@ -6,3 +6,4 @@
> obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o
> obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o
> obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o
> +obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o
>
>