Re: [PATCH] ptp: Add vDSO-style vmclock support

From: Paolo Abeni
Date: Thu Jul 25 2024 - 07:21:27 EST


Hi,

Just a bunch of 'nits below

On 7/24/24 19:16, David Woodhouse wrote:
diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c
new file mode 100644
index 000000000000..9c508c21c062
--- /dev/null
+++ b/drivers/ptp/ptp_vmclock.c

[...]

+/*
+ * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64
+ * and add the fractional second part of the reference time.
+ *
+ * The result is a 128-bit value, the top 64 bits of which are seconds, and
+ * the low 64 bits are (seconds >> 64).
+ *
+ * If __int128 isn't available, perform the calculation 32 bits at a time to
+ * avoid overflow.
+ */
+static inline uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
+ uint64_t period, uint8_t shift,
+ uint64_t frac_sec)

Please, no 'inline' in \.c files

+{
+ unsigned __int128 res = (unsigned __int128)delta * period;
+
+ res >>= shift;
+ res += frac_sec;
+ *res_hi = res >> 64;
+ return (uint64_t)res;
+}
+
+static inline bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
+{

Same here

+ if (likely(clk->time_type == VMCLOCK_TIME_UTC))
+ return true;
+
+ if (clk->time_type == VMCLOCK_TIME_TAI &&
+ (clk->flags & VMCLOCK_FLAG_TAI_OFFSET_VALID)) {
+ if (sec)
+ *sec += clk->tai_offset_sec;
+ return true;
+ }
+ return false;
+}
+
+static int vmclock_get_crosststamp(struct vmclock_state *st,
+ struct ptp_system_timestamp *sts,
+ struct system_counterval_t *system_counter,
+ struct timespec64 *tspec)
+{
+ ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
+ struct system_time_snapshot systime_snapshot;
+ uint64_t cycle, delta, seq, frac_sec;
+
+#ifdef CONFIG_X86
+ /*
+ * We'd expect the hypervisor to know this and to report the clock
+ * status as VMCLOCK_STATUS_UNRELIABLE. But be paranoid.
+ */
+ if (check_tsc_unstable())
+ return -EINVAL;
+#endif
+
+ while (1) {
+ seq = st->clk->seq_count & ~1ULL;
+ virt_rmb();

Please document which other barrier pair witht this one

+
+ if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
+ return -EINVAL;
+
+ /*
+ * When invoked for gettimex64(), fill in the pre/post system
+ * times. The simple case is when system time is based on the
+ * same counter as st->cs_id, in which case all three times
+ * will be derived from the *same* counter value.
+ *
+ * If the system isn't using the same counter, then the value
+ * from ktime_get_snapshot() will still be used as pre_ts, and
+ * ptp_read_system_postts() is called to populate postts after
+ * calling get_cycles().
+ *
+ * The conversion to timespec64 happens further down, outside
+ * the seq_count loop.
+ */
+ if (sts) {
+ ktime_get_snapshot(&systime_snapshot);
+ if (systime_snapshot.cs_id == st->cs_id) {
+ cycle = systime_snapshot.cycles;
+ } else {
+ cycle = get_cycles();
+ ptp_read_system_postts(sts);
+ }
+ } else
+ cycle = get_cycles();

Please use the brackets even for the else case

[...]
+static int ptp_vmclock_get_time_fn(ktime_t *device_time,
+ struct system_counterval_t *system_counter,
+ void *ctx)
+{
+ struct vmclock_state *st = ctx;
+ struct timespec64 tspec;
+ int ret;
+
+#ifdef SUPPORT_KVMCLOCK
+ if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK)
+ ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter,
+ &tspec);
+ else
+#endif
+ ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec);
+
+ if (!ret)
+ *device_time = timespec64_to_ktime(tspec);
+
+ return ret;
+}
+
+

Please, don't add 2 consecutive blank lines.

Cheers,

Paolo