Re: [RFC] timekeeping: Use cached readouts for monotonic and raw clocks in suspend

From: joelaf
Date: Sun Nov 20 2016 - 22:09:26 EST


Hi Thomas,

On 11/20/2016 05:24 AM, Thomas Gleixner wrote:
On Sat, 19 Nov 2016, Joel Fernandes wrote:

I am planning to add boot clock as a trace clock that can account suspend time
during tracing, however ktime_get_with_offset throws a warning as the
clocksource is attempted to be accessed in suspend.

ktime_get_with_offset() cannot be used as trace clock at all because it can
life lock in NMI context. That's why we have ktime_get_mono_fast().

But ktime_get_mono_fast doesn't account the suspend time, only boot clock (accessed with ktime_get_with_offset) does and while tracing it is useful for the trace clock to account suspended time for my usecase.

Instead, would it be ok to introduce a fast boot clock derived from fast monotonic clock to address the NMI live lock issues you mentioned? Below is an untested patch just to show the idea. Let me know your suggestions and Thanks,

Joel
----------8<--------------
From 78c4f89e6f39cdd32e91883f2d2a80c7d97e34cf Mon Sep 17 00:00:00 2001
From: Joel Fernandes <joelaf@xxxxxxxxxx>
Date: Sun, 20 Nov 2016 18:58:28 -0800
Subject: [RFC] timekeeping: Add a fast boot clock derived from fast
monotonic clock

Signed-off-by: Joel Fernandes <joelaf@xxxxxxxxxx>
---
kernel/time/timekeeping.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e..41afa1e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -55,6 +55,12 @@ static struct timekeeper shadow_timekeeper;
*/
struct tk_fast {
seqcount_t seq;
+
+ /*
+ * first dimension is based on lower seq bit,
+ * second dimension is for offset type (real, boot, tai)
+ */
+ ktime_t offsets[2][3];
struct tk_read_base base[2];
};

@@ -350,14 +356,20 @@ static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf
/* Force readers off to base[1] */
raw_write_seqcount_latch(&tkf->seq);

- /* Update base[0] */
+ /* Update base[0] and offsets*/
memcpy(base, tkr, sizeof(*base));
+ tkf->offsets[0][TK_OFFS_REAL] = tk_core.timekeeper.offs_real;
+ tkf->offsets[0][TK_OFFS_BOOT] = tk_core.timekeeper.offs_boot;
+ tkf->offsets[0][TK_OFFS_TAI] = tk_core.timekeeper.offs_tai;

/* Force readers back to base[0] */
raw_write_seqcount_latch(&tkf->seq);

- /* Update base[1] */
+ /* Update base[1] and offsets*/
memcpy(base + 1, base, sizeof(*base));
+ tkf->offsets[1][TK_OFFS_REAL] = tk_core.timekeeper.offs_real;
+ tkf->offsets[1][TK_OFFS_BOOT] = tk_core.timekeeper.offs_boot;
+ tkf->offsets[1][TK_OFFS_TAI] = tk_core.timekeeper.offs_tai;
}

/**
@@ -392,16 +404,23 @@ static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
-static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf, int offset)
{
struct tk_read_base *tkr;
unsigned int seq;
u64 now;
+ ktime_t *off;

do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
- now = ktime_to_ns(tkr->base);
+
+ if (offset >= 0) {
+ off = tkf->offsets[seq & 0x01];
+ now = ktime_to_ns(ktime_add(tkr->base, off[offset]));
+ } else {
+ now = ktime_to_ns(tkr->base);
+ }

now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
@@ -415,16 +434,21 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)

u64 ktime_get_mono_fast_ns(void)
{
- return __ktime_get_fast_ns(&tk_fast_mono);
+ return __ktime_get_fast_ns(&tk_fast_mono, -1);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);

u64 ktime_get_raw_fast_ns(void)
{
- return __ktime_get_fast_ns(&tk_fast_raw);
+ return __ktime_get_fast_ns(&tk_fast_raw, -1);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);

+u64 ktime_get_boot_fast_ns(void)
+{
+ return __ktime_get_fast_ns(&tk_fast_mono, TK_OFFS_BOOT);
+}
+
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;

--
2.8.0.rc3.226.g39d4020