[RFC PATCH v2 2/8] cpuidle: record the overhead of idle entry

From: Aubrey Li
Date: Sat Sep 30 2017 - 03:21:39 EST


Record the overhead of idle entry in micro-second

Signed-off-by: Aubrey Li <aubrey.li@xxxxxxxxxxxxxxx>
---
drivers/cpuidle/cpuidle.c | 33 +++++++++++++++++++++++++++++++++
include/linux/cpuidle.h | 14 ++++++++++++++
kernel/sched/idle.c | 8 +++++++-
3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 60bb64f..4066308 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -302,6 +302,39 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
cpuidle_curr_governor->reflect(dev, index);
}

+/* cpuidle_entry_start - record idle entry start */
+void cpuidle_entry_start(void)
+{
+ struct cpuidle_device *dev = cpuidle_get_device();
+
+ if (dev)
+ dev->idle_stat.entry_start = local_clock();
+}
+
+/*
+ * cpuidle_entry_end - record idle entry end, and maintain
+ * the entry overhead average in micro-second
+ */
+void cpuidle_entry_end(void)
+{
+ struct cpuidle_device *dev = cpuidle_get_device();
+ u64 overhead;
+ s64 diff;
+
+ if (dev) {
+ dev->idle_stat.entry_end = local_clock();
+ overhead = div_u64(dev->idle_stat.entry_end -
+ dev->idle_stat.entry_start, NSEC_PER_USEC);
+ diff = overhead - dev->idle_stat.overhead;
+ dev->idle_stat.overhead += diff >> 3;
+ /*
+ * limit overhead to 1us
+ */
+ if (dev->idle_stat.overhead == 0)
+ dev->idle_stat.overhead = 1;
+ }
+}
+
/**
* cpuidle_install_idle_handler - installs the cpuidle idle loop handler
*/
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index fc1e5d7..cad9b71 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -72,6 +72,15 @@ struct cpuidle_device_kobj;
struct cpuidle_state_kobj;
struct cpuidle_driver_kobj;

+struct cpuidle_stat {
+ u64 entry_start; /* nanosecond */
+ u64 entry_end; /* nanosecond */
+ u64 overhead; /* nanosecond */
+ unsigned int predicted_us; /* microsecond */
+ bool predicted; /* ever predicted? */
+ bool fast_idle; /* fast idle? */
+};
+
struct cpuidle_device {
unsigned int registered:1;
unsigned int enabled:1;
@@ -89,6 +98,7 @@ struct cpuidle_device {
cpumask_t coupled_cpus;
struct cpuidle_coupled *coupled;
#endif
+ struct cpuidle_stat idle_stat;
};

DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
@@ -131,6 +141,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,

extern int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
+extern void cpuidle_entry_start(void);
+extern void cpuidle_entry_end(void);
extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
@@ -164,6 +176,8 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
static inline int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return -ENODEV; }
+static inline void cpuidle_entry_start(void) { }
+static inline void cpuidle_entry_end(void) { }
static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index)
{return -ENODEV; }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 6c23e30..0951dac 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -210,6 +210,12 @@ static void cpuidle_idle_call(void)
static void do_idle(void)
{
/*
+ * we record idle entry overhead now, so any deferrable items
+ * in idle entry path need to be placed between cpuidle_entry_start()
+ * and cpuidle_entry_end()
+ */
+ cpuidle_entry_start();
+ /*
* If the arch has a polling bit, we maintain an invariant:
*
* Our polling bit is clear if we're not scheduled (i.e. if rq->curr !=
@@ -217,10 +223,10 @@ static void do_idle(void)
* then setting need_resched is guaranteed to cause the CPU to
* reschedule.
*/
-
__current_set_polling();
quiet_vmstat();
tick_nohz_idle_enter();
+ cpuidle_entry_end();

while (!need_resched()) {
check_pgt_cache();
--
2.7.4