Re: 'perf test tsc' failing, bisected to "sched/clock: Provide better clock continuity"
From: Arnaldo Carvalho de Melo
Date: Mon Mar 20 2017 - 09:22:48 EST
Em Thu, Mar 16, 2017 at 04:22:31PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Thu, Mar 16, 2017 at 04:21:38PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Didn't apply to perf/urgent:
> >
> > [acme@jouet linux]$ patch -p1 < /wb/1.patch
> > patching file arch/x86/events/core.c
> > Hunk #1 succeeded at 2243 (offset -1 lines).
> > Hunk #2 succeeded at 2251 (offset -1 lines).
> > Hunk #3 succeeded at 2265 (offset -1 lines).
> > Hunk #4 succeeded at 2273 (offset -1 lines).
> > patching file arch/x86/include/asm/timer.h
> > patching file arch/x86/kernel/tsc.c
> > Hunk #1 FAILED at 328.
> perf/core almost super clean:
> [acme@felicio linux]$ patch -p1 < /wb/1.patch
> patching file arch/x86/events/core.c
> patching file arch/x86/include/asm/timer.h
> patching file arch/x86/kernel/tsc.c
> patching file include/linux/sched/clock.h
> patching file kernel/sched/clock.c
> Hunk #3 succeeded at 154 with fuzz 2 (offset -7 lines).
> Hunk #4 succeeded at 231 (offset -7 lines).
> Hunk #5 succeeded at 317 (offset -7 lines).
> [acme@felicio linux]$
>
> Building...
Thanks, the test is happy now, see below, please stick my:
Reported-and-Tested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
[acme@felicio linux]$ uname -a
Linux felicio.ghostprotocols.net 4.11.0-rc2+ #30 SMP Mon Mar 20 09:47:16 BRT 2017 x86_64 x86_64 x86_64 GNU/Linux
[acme@felicio ~]$ perf test tsc
54: Convert perf time to TSC : Ok
[acme@felicio ~]$ perf test -v tsc
54: Convert perf time to TSC :
--- start ---
test child forked, pid 10096
mmap size 528384B
1st event perf time 791386772903 tsc 2539203231623
rdtsc time 791386773842 tsc 2539203234547
2nd event perf time 791386774062 tsc 2539203235229
test child finished with 0
---- end ----
Convert perf time to TSC: Ok
[acme@felicio ~]$
Patch as applied here, i.e. after those fuzzies applying on
tip/perf/core:
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 349d4d17aa7f..ed7e4a10c293 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2244,6 +2244,7 @@ void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now)
{
struct cyc2ns_data *data;
+ u64 offset;
userpg->cap_user_time = 0;
userpg->cap_user_time_zero = 0;
@@ -2251,11 +2252,13 @@ void arch_perf_update_userpage(struct perf_event *event,
!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
userpg->pmc_width = x86_pmu.cntval_bits;
- if (!sched_clock_stable())
+ if (!using_native_sched_clock() || !sched_clock_stable())
return;
data = cyc2ns_read_begin();
+ offset = data->cyc2ns_offset + __sched_clock_offset;
+
/*
* Internal timekeeping for enabled/running/stopped times
* is always in the local_clock domain.
@@ -2263,7 +2266,7 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time = 1;
userpg->time_mult = data->cyc2ns_mul;
userpg->time_shift = data->cyc2ns_shift;
- userpg->time_offset = data->cyc2ns_offset - now;
+ userpg->time_offset = offset - now;
/*
* cap_user_time_zero doesn't make sense when we're using a different
@@ -2271,7 +2274,7 @@ void arch_perf_update_userpage(struct perf_event *event,
*/
if (!event->attr.use_clockid) {
userpg->cap_user_time_zero = 1;
- userpg->time_zero = data->cyc2ns_offset;
+ userpg->time_zero = offset;
}
cyc2ns_read_end(data);
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index a04eabd43d06..27e9f9d769b8 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -12,6 +12,8 @@
extern int no_timer_check;
+extern bool using_native_sched_clock(void);
+
/*
* We use the full linear equation: f(x) = a + b*x, in order to allow
* a continuous function in the face of dynamic freq changes.
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 4f7a9833d8e5..de38a141a52a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -328,7 +328,7 @@ unsigned long long sched_clock(void)
return paravirt_sched_clock();
}
-static inline bool using_native_sched_clock(void)
+bool using_native_sched_clock(void)
{
return pv_time_ops.sched_clock == native_sched_clock;
}
@@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void)
unsigned long long
sched_clock(void) __attribute__((alias("native_sched_clock")));
-static inline bool using_native_sched_clock(void) { return true; }
+bool using_native_sched_clock(void) { return true; }
#endif
int check_tsc_unstable(void)
diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h
index 4a68c6791207..34fe92ce1ebd 100644
--- a/include/linux/sched/clock.h
+++ b/include/linux/sched/clock.h
@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
}
#else
extern void sched_clock_init_late(void);
-/*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
- */
extern int sched_clock_stable(void);
extern void clear_sched_clock_stable(void);
+/*
+ * When sched_clock_stable(), __sched_clock_offset provides the offset
+ * between local_clock() and sched_clock().
+ */
+extern u64 __sched_clock_offset;
+
+
extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index a08795e21628..5448d188b8d3 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -96,10 +96,10 @@ void sched_clock_init(void)
static int __sched_clock_stable_early = 1;
/*
- * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
+ * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
*/
-static __read_mostly u64 raw_offset;
-static __read_mostly u64 gtod_offset;
+__read_mostly u64 __sched_clock_offset;
+static __read_mostly u64 __gtod_offset;
struct sched_clock_data {
u64 tick_raw;
@@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void)
/*
* Attempt to make the (initial) unstable->stable transition continuous.
*/
- raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
+ __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
- scd->tick_gtod, gtod_offset,
- scd->tick_raw, raw_offset);
+ scd->tick_gtod, __gtod_offset,
+ scd->tick_raw, __sched_clock_offset);
static_branch_enable(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
@@ -154,11 +154,11 @@ static void __clear_sched_clock_stable(struct work_struct *work)
*
* Still do what we can.
*/
- gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
+ __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
- scd->tick_gtod, gtod_offset,
- scd->tick_raw, raw_offset);
+ scd->tick_gtod, __gtod_offset,
+ scd->tick_raw, __sched_clock_offset);
static_branch_disable(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
@@ -231,7 +231,7 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
* scd->tick_gtod + TICK_NSEC);
*/
- clock = scd->tick_gtod + gtod_offset + delta;
+ clock = scd->tick_gtod + __gtod_offset + delta;
min_clock = wrap_max(scd->tick_gtod, old_clock);
max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
@@ -317,7 +317,7 @@ u64 sched_clock_cpu(int cpu)
u64 clock;
if (sched_clock_stable())
- return sched_clock() + raw_offset;
+ return sched_clock() + __sched_clock_offset;
if (unlikely(!sched_clock_running))
return 0ull;