[Patch 2.6.26] Fix prinkt timesamps for good [was: [Bug 10648] CONFIG_PRINTK_TIME broken on git HEAD ?]
From: Martin Knoblauch
Date: Mon Jul 14 2008 - 05:59:01 EST
----- Original Message ----
> From: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
> To: Martin Knoblauch <spamtrap@xxxxxxxxxxxx>
> Sent: Tuesday, June 10, 2008 10:08:39 AM
> Subject: Re: [Bug 10648] CONFIG_PRINTK_TIME broken on git HEAD ?
>
> On Tue, 2008-06-10 at 01:03 -0700, Martin Knoblauch wrote:
> > ----- Original Message ----
> >
> > > From: Peter Zijlstra
> > > To: Martin Knoblauch
> > > Sent: Monday, June 9, 2008 5:18:05 PM
> > > Subject: Re: [Bug 10648] CONFIG_PRINTK_TIME broken on git HEAD ?
> > >
> > > On Mon, 2008-06-09 at 08:13 -0700, Martin Knoblauch wrote:
> > > > Hi Peter,
> > > >
> > > > could you please send me that patch as an attachment. My mail agent
> mangles
> > > the bloody thing beyond recognition ....
> > >
> > > here goes
> > Hi Peter,
> >
> > the patch is for 2.6.26 correct? First I assumed it is the backport
> > to 2.6.25, but apparently that does not have kernel/sched_clock.c. In
> > any case, applying to 2.6.26-rc5 works fine and the result is great.
> > Timestamps are monotonic and correspond to the first number in
> > "/proc/uptime" much better than ever before. Great work. If this goes
> > into 2.6.26, I will [try to] prepare a patch against 2.6.25.
>
> Hmm, the patch might be a tad to big for .26 at this point, but I'll see
> what I can do to make .26 a better kernel ;-)
>
> Thanks for testing!
Hi Peter,
now that 2.6.26 is released you may want to push the patch to fix the print-timestamps for good. I have rebased your patch to 2.6.26 and it works as expected. printk-timestamps are now monotonic and show "uptime" within millisecond accuracy. This is IMO also material for 2.6.26-stable. To fix it for 2.6.25 seems more involved and I do not personally see the urgency
Cheers
Martin
Tested-by: Martin Knoblauch <spamtrap@xxxxxxxxxxxx>
diff -urp linux-2.6.26-orig/kernel/sched.c linux-2.6.26-sched-clock/kernel/sched.c
--- linux-2.6.26-orig/kernel/sched.c 2008-07-13 23:51:29.000000000 +0200
+++ linux-2.6.26-sched-clock/kernel/sched.c 2008-07-14 10:25:15.000000000 +0200
@@ -754,8 +754,6 @@ const_debug unsigned int sysctl_sched_nr
*/
unsigned int sysctl_sched_rt_period = 1000000;
-static __read_mostly int scheduler_running;
-
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
@@ -775,82 +773,6 @@ static inline u64 global_rt_runtime(void
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
-unsigned long long time_sync_thresh = 100000;
-
-static DEFINE_PER_CPU(unsigned long long, time_offset);
-static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
-
-/*
- * Global lock which we take every now and then to synchronize
- * the CPUs time. This method is not warp-safe, but it's good
- * enough to synchronize slowly diverging time sources and thus
- * it's good enough for tracing:
- */
-static DEFINE_SPINLOCK(time_sync_lock);
-static unsigned long long prev_global_time;
-
-static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
-{
- /*
- * We want this inlined, to not get tracer function calls
- * in this critical section:
- */
- spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
- __raw_spin_lock(&time_sync_lock.raw_lock);
-
- if (time < prev_global_time) {
- per_cpu(time_offset, cpu) += prev_global_time - time;
- time = prev_global_time;
- } else {
- prev_global_time = time;
- }
-
- __raw_spin_unlock(&time_sync_lock.raw_lock);
- spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
-
- return time;
-}
-
-static unsigned long long __cpu_clock(int cpu)
-{
- unsigned long long now;
-
- /*
- * Only call sched_clock() if the scheduler has already been
- * initialized (some code might call cpu_clock() very early):
- */
- if (unlikely(!scheduler_running))
- return 0;
-
- now = sched_clock_cpu(cpu);
-
- return now;
-}
-
-/*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
- */
-unsigned long long cpu_clock(int cpu)
-{
- unsigned long long prev_cpu_time, time, delta_time;
- unsigned long flags;
-
- local_irq_save(flags);
- prev_cpu_time = per_cpu(prev_cpu_time, cpu);
- time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
- delta_time = time-prev_cpu_time;
-
- if (unlikely(delta_time > time_sync_thresh)) {
- time = __sync_cpu_clock(time, cpu);
- per_cpu(prev_cpu_time, cpu) = time;
- }
- local_irq_restore(flags);
-
- return time;
-}
-EXPORT_SYMBOL_GPL(cpu_clock);
-
#ifndef prepare_arch_switch
# define prepare_arch_switch(next) do { } while (0)
#endif
@@ -7858,8 +7780,6 @@ void __init sched_init(void)
* During early bootup we pretend to be a normal task:
*/
current->sched_class = &fair_sched_class;
-
- scheduler_running = 1;
}
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
diff -urp linux-2.6.26-orig/kernel/sched_clock.c linux-2.6.26-sched-clock/kernel/sched_clock.c
--- linux-2.6.26-orig/kernel/sched_clock.c 2008-07-13 23:51:29.000000000 +0200
+++ linux-2.6.26-sched-clock/kernel/sched_clock.c 2008-07-14 10:25:15.000000000 +0200
@@ -244,3 +244,15 @@ unsigned long long __attribute__((weak))
{
return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
}
+
+unsigned long long cpu_clock(int cpu)
+{
+ unsigned long long clock;
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ clock = sched_clock_cpu(cpu);
+ raw_local_irq_restore(flags);
+
+ return clock;
+}diff -urp linux-2.6.26-orig/kernel/sched.c linux-2.6.26-sched-clock/kernel/sched.c
--- linux-2.6.26-orig/kernel/sched.c 2008-07-13 23:51:29.000000000 +0200
+++ linux-2.6.26-sched-clock/kernel/sched.c 2008-07-14 10:25:15.000000000 +0200
@@ -754,8 +754,6 @@ const_debug unsigned int sysctl_sched_nr
*/
unsigned int sysctl_sched_rt_period = 1000000;
-static __read_mostly int scheduler_running;
-
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
@@ -775,82 +773,6 @@ static inline u64 global_rt_runtime(void
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}
-unsigned long long time_sync_thresh = 100000;
-
-static DEFINE_PER_CPU(unsigned long long, time_offset);
-static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
-
-/*
- * Global lock which we take every now and then to synchronize
- * the CPUs time. This method is not warp-safe, but it's good
- * enough to synchronize slowly diverging time sources and thus
- * it's good enough for tracing:
- */
-static DEFINE_SPINLOCK(time_sync_lock);
-static unsigned long long prev_global_time;
-
-static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
-{
- /*
- * We want this inlined, to not get tracer function calls
- * in this critical section:
- */
- spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
- __raw_spin_lock(&time_sync_lock.raw_lock);
-
- if (time < prev_global_time) {
- per_cpu(time_offset, cpu) += prev_global_time - time;
- time = prev_global_time;
- } else {
- prev_global_time = time;
- }
-
- __raw_spin_unlock(&time_sync_lock.raw_lock);
- spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
-
- return time;
-}
-
-static unsigned long long __cpu_clock(int cpu)
-{
- unsigned long long now;
-
- /*
- * Only call sched_clock() if the scheduler has already been
- * initialized (some code might call cpu_clock() very early):
- */
- if (unlikely(!scheduler_running))
- return 0;
-
- now = sched_clock_cpu(cpu);
-
- return now;
-}
-
-/*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
- */
-unsigned long long cpu_clock(int cpu)
-{
- unsigned long long prev_cpu_time, time, delta_time;
- unsigned long flags;
-
- local_irq_save(flags);
- prev_cpu_time = per_cpu(prev_cpu_time, cpu);
- time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
- delta_time = time-prev_cpu_time;
-
- if (unlikely(delta_time > time_sync_thresh)) {
- time = __sync_cpu_clock(time, cpu);
- per_cpu(prev_cpu_time, cpu) = time;
- }
- local_irq_restore(flags);
-
- return time;
-}
-EXPORT_SYMBOL_GPL(cpu_clock);
-
#ifndef prepare_arch_switch
# define prepare_arch_switch(next) do { } while (0)
#endif
@@ -7858,8 +7780,6 @@ void __init sched_init(void)
* During early bootup we pretend to be a normal task:
*/
current->sched_class = &fair_sched_class;
-
- scheduler_running = 1;
}
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
diff -urp linux-2.6.26-orig/kernel/sched_clock.c linux-2.6.26-sched-clock/kernel/sched_clock.c
--- linux-2.6.26-orig/kernel/sched_clock.c 2008-07-13 23:51:29.000000000 +0200
+++ linux-2.6.26-sched-clock/kernel/sched_clock.c 2008-07-14 10:25:15.000000000 +0200
@@ -244,3 +244,15 @@ unsigned long long __attribute__((weak))
{
return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
}
+
+unsigned long long cpu_clock(int cpu)
+{
+ unsigned long long clock;
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ clock = sched_clock_cpu(cpu);
+ raw_local_irq_restore(flags);
+
+ return clock;
+}
Only in linux-2.6.26-sched-clock/kernel: sched.c.orig