[PATCH 2/2] x86/idle: use dynamic halt poll
From: root
Date: Thu Jun 22 2017 - 07:23:07 EST
From: Yang Zhang <yang.zhang.wz@xxxxxxxxx>
use dynamic poll to reduce the cost when the event is not occurred during
poll. The idea is similar to current dynamic halt poll inside KVM:
Before entering idle, we will record the time. After wakeup from idle
(nomally, this is in interrupt handler), we will record the time too.
Then we will check whether we need to grow/shrink the poll time depands
on how long the CPU stay inside idle state.
There are two new sysctl to change poll time dynamically:
poll_shrink, poll_grow
Signed-off-by: Yang Zhang <yang.zhang.wz@xxxxxxxxx>
---
Documentation/sysctl/kernel.txt | 14 ++++++++
arch/x86/include/asm/processor.h | 6 ++++
arch/x86/kernel/apic/apic.c | 6 ++++
arch/x86/kernel/apic/vector.c | 1 +
arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 ++
arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 ++
arch/x86/kernel/cpu/mcheck/threshold.c | 2 ++
arch/x86/kernel/irq.c | 5 +++
arch/x86/kernel/irq_work.c | 2 ++
arch/x86/kernel/process.c | 59 ++++++++++++++++++++++++++++++++
arch/x86/kernel/smp.c | 6 ++++
include/linux/kernel.h | 2 ++
kernel/sysctl.c | 14 ++++++++
13 files changed, 121 insertions(+)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 4e71bfe..76043b4 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -63,6 +63,8 @@ show up in /proc/sys/kernel:
- perf_event_max_stack
- perf_event_max_contexts_per_stack
- pid_max
+- poll_grow [ X86 only ]
+- poll_shrink [ X86 only ]
- poll_threshold_ns [ X86 only ]
- powersave-nap [ PPC only ]
- printk
@@ -703,6 +705,18 @@ kernel tries to allocate a number starting from this one.
==============================================================
+poll_grow: (X86 only)
+
+This parameter is multiplied in the grow_poll_ns() to increase the poll time.
+By default, the values is 2.
+
+==============================================================
+poll_shrink: (X86 only)
+
+This parameter is divided in the shrink_poll_ns() to reduce the poll time.
+By default, the values is 2.
+
+==============================================================
poll_threshold_ns: (X86 only)
This parameter used to control the max wait time to poll before going
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 3cada99..cf952ed 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -931,4 +931,10 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
+#ifdef CONFIG_HYPERVISOR_GUEST
+extern void check_poll(void);
+#else
+static inline void check_poll(void) {}
+#endif
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2d75faf..37b16b6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -962,6 +962,7 @@ __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
* interrupt lock, which is the WrongThing (tm) to do.
*/
entering_ack_irq();
+ check_poll();
local_apic_timer_interrupt();
exiting_irq();
@@ -981,6 +982,7 @@ __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
* interrupt lock, which is the WrongThing (tm) to do.
*/
entering_ack_irq();
+ check_poll();
trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
trace_local_timer_exit(LOCAL_TIMER_VECTOR);
@@ -1863,6 +1865,7 @@ static void __smp_spurious_interrupt(u8 vector)
__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
{
entering_irq();
+ check_poll();
__smp_spurious_interrupt(~regs->orig_ax);
exiting_irq();
}
@@ -1872,6 +1875,7 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
u8 vector = ~regs->orig_ax;
entering_irq();
+ check_poll();
trace_spurious_apic_entry(vector);
__smp_spurious_interrupt(vector);
trace_spurious_apic_exit(vector);
@@ -1921,6 +1925,7 @@ static void __smp_error_interrupt(struct pt_regs *regs)
__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
{
entering_irq();
+ check_poll();
__smp_error_interrupt(regs);
exiting_irq();
}
@@ -1928,6 +1933,7 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
{
entering_irq();
+ check_poll();
trace_error_apic_entry(ERROR_APIC_VECTOR);
__smp_error_interrupt(regs);
trace_error_apic_exit(ERROR_APIC_VECTOR);
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index f3557a1..77fc6ed 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -564,6 +564,7 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
unsigned vector, me;
entering_ack_irq();
+ check_poll();
/* Prevent vectors vanishing under us */
raw_spin_lock(&vector_lock);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 6e4a047..7f984d6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -819,6 +819,7 @@ static inline void __smp_deferred_error_interrupt(void)
asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
{
entering_irq();
+ check_poll();
__smp_deferred_error_interrupt();
exiting_ack_irq();
}
@@ -826,6 +827,7 @@ asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
{
entering_irq();
+ check_poll();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
__smp_deferred_error_interrupt();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index d7cc190..d420b42 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -400,6 +400,7 @@ static inline void __smp_thermal_interrupt(void)
smp_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
+ check_poll();
__smp_thermal_interrupt();
exiting_ack_irq();
}
@@ -408,6 +409,7 @@ static inline void __smp_thermal_interrupt(void)
smp_trace_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
+ check_poll();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
__smp_thermal_interrupt();
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index bb0e75ee..77858ba 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -26,6 +26,7 @@ static inline void __smp_threshold_interrupt(void)
asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
{
entering_irq();
+ check_poll();
__smp_threshold_interrupt();
exiting_ack_irq();
}
@@ -33,6 +34,7 @@ asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
{
entering_irq();
+ check_poll();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
__smp_threshold_interrupt();
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index f34fe74..65ff260 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -230,6 +230,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
entering_irq();
+ check_poll();
/* entering_irq() tells RCU that we're not quiescent. Check it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
@@ -269,6 +270,7 @@ __visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
+ check_poll();
__smp_x86_platform_ipi();
exiting_irq();
set_irq_regs(old_regs);
@@ -295,6 +297,7 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
+ check_poll();
inc_irq_stat(kvm_posted_intr_ipis);
exiting_irq();
set_irq_regs(old_regs);
@@ -308,6 +311,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
+ check_poll();
inc_irq_stat(kvm_posted_intr_wakeup_ipis);
kvm_posted_intr_wakeup_handler();
exiting_irq();
@@ -320,6 +324,7 @@ __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
entering_ack_irq();
+ check_poll();
trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
__smp_x86_platform_ipi();
trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 2754878..2c4b6cd 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -20,6 +20,7 @@ static inline void __smp_irq_work_interrupt(void)
__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
+ check_poll();
__smp_irq_work_interrupt();
exiting_irq();
}
@@ -27,6 +28,7 @@ __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
+ check_poll();
trace_irq_work_entry(IRQ_WORK_VECTOR);
__smp_irq_work_interrupt();
trace_irq_work_exit(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6361783..e5238a8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -41,6 +41,10 @@
#ifdef CONFIG_HYPERVISOR_GUEST
unsigned long poll_threshold_ns;
+unsigned int poll_shrink = 2;
+unsigned int poll_grow = 2;
+DEFINE_PER_CPU(unsigned long, poll_begin_ns);
+DEFINE_PER_CPU(unsigned long, poll_ns);
#endif
/*
@@ -318,6 +322,57 @@ static inline void play_dead(void)
#endif
#ifdef CONFIG_HYPERVISOR_GUEST
+static unsigned int grow_poll_ns(unsigned int old, unsigned int grow,
+ unsigned int max)
+{
+ unsigned int val;
+
+ /* 10us as base poll duration */
+ if (old == 0 && grow)
+ return 10000;
+
+ val = old * grow;
+ if (val > max)
+ val = max;
+
+ return val;
+}
+
+static unsigned int shrink_poll_ns(unsigned int old, unsigned int shrink)
+{
+ if (shrink == 0)
+ return 0;
+
+ return old / shrink;
+}
+
+void check_poll(void)
+{
+ unsigned int val, poll_duration;
+ unsigned long begin_ns, now_ns;
+
+ if (!poll_threshold_ns)
+ return;
+
+ begin_ns = this_cpu_read(poll_begin_ns);
+ /* Not from halt state */
+ if (!begin_ns)
+ return;
+
+ now_ns = ktime_to_ns(ktime_get());
+ poll_duration = this_cpu_read(poll_ns);
+
+ if (poll_duration && now_ns - begin_ns > poll_threshold_ns)
+ val = shrink_poll_ns(poll_duration, poll_shrink);
+ else if (poll_duration < poll_threshold_ns &&
+ now_ns - begin_ns < poll_threshold_ns)
+ val = grow_poll_ns(poll_duration, poll_grow, poll_threshold_ns);
+
+ this_cpu_write(poll_ns, val);
+ this_cpu_write(poll_begin_ns, 0);
+
+}
+
void arch_cpu_idle_poll(void)
{
ktime_t start, cur, stop;
@@ -359,6 +414,10 @@ void arch_cpu_idle(void)
void __cpuidle default_idle(void)
{
trace_cpu_idle_rcuidle(1, smp_processor_id());
+#ifdef CONFIG_HYPERVISOR_GUEST
+ if (poll_threshold_ns)
+ this_cpu_write(poll_begin_ns, ktime_to_ns(ktime_get()));
+#endif
safe_halt();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index d798c0d..81a3961 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -265,6 +265,7 @@ static inline void __smp_reschedule_interrupt(void)
__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
+ check_poll();
__smp_reschedule_interrupt();
/*
* KVM uses this interrupt to force a cpu out of guest mode
@@ -280,6 +281,7 @@ __visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
* to nest.
*/
ipi_entering_ack_irq();
+ check_poll();
trace_reschedule_entry(RESCHEDULE_VECTOR);
__smp_reschedule_interrupt();
trace_reschedule_exit(RESCHEDULE_VECTOR);
@@ -298,6 +300,7 @@ static inline void __smp_call_function_interrupt(void)
__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
+ check_poll();
__smp_call_function_interrupt();
exiting_irq();
}
@@ -306,6 +309,7 @@ __visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
smp_trace_call_function_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
+ check_poll();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
__smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
@@ -322,6 +326,7 @@ static inline void __smp_call_function_single_interrupt(void)
smp_call_function_single_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
+ check_poll();
__smp_call_function_single_interrupt();
exiting_irq();
}
@@ -330,6 +335,7 @@ static inline void __smp_call_function_single_interrupt(void)
smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
+ check_poll();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
__smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 04cf774..e901b26 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -462,6 +462,8 @@ extern __scanf(2, 0)
extern bool crash_kexec_post_notifiers;
#ifdef CONFIG_HYPERVISOR_GUEST
extern unsigned long poll_threshold_ns;
+extern unsigned int poll_shrink;
+extern unsigned int poll_grow;
#endif
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9174d57..82776eb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1211,6 +1211,20 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "halt_poll_grow",
+ .data = &poll_grow,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "halt_poll_shrink",
+ .data = &poll_shrink,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#endif
{ }
};
--
1.8.3.1