[PATCH v4 3/5] x86/umwait: Add sysfs interface to control umwait C0.2 state
From: Fenghua Yu
Date: Fri Jun 07 2019 - 18:14:38 EST
C0.2 state in umwait and tpause instructions can be enabled or disabled
on a processor through IA32_UMWAIT_CONTROL MSR register.
By default, C0.2 is enabled and the user wait instructions result in
lower power consumption with slower wakeup time.
But in real time systems which require faster wakeup time although power
savings could be smaller, the administrator needs to disable C0.2 and all
C0.2 requests from user applications revert to C0.1.
A sysfs interface "/sys/devices/system/cpu/umwait_control/enable_c02" is
created to allow the administrator to control C0.2 state during run time.
Signed-off-by: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Reviewed-by: Ashok Raj <ashok.raj@xxxxxxxxx>
Reviewed-by: Tony Luck <tony.luck@xxxxxxxxx>
---
arch/x86/power/umwait.c | 89 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 88 insertions(+), 1 deletion(-)
diff --git a/arch/x86/power/umwait.c b/arch/x86/power/umwait.c
index 23151e77c138..9c176f3e59b6 100644
--- a/arch/x86/power/umwait.c
+++ b/arch/x86/power/umwait.c
@@ -11,10 +11,18 @@
*/
static u32 umwait_control_cached = 100000;
+/*
+ * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR
+ * to guarantee all CPUs have the same MSR value.
+ */
+static DEFINE_MUTEX(umwait_lock);
+
/* Set up IA32_UMWAIT_CONTROL MSR on CPU using the current global setting. */
static int umwait_cpu_online(unsigned int cpu)
{
+ mutex_lock(&umwait_lock);
wrmsr(MSR_IA32_UMWAIT_CONTROL, umwait_control_cached, 0);
+ mutex_unlock(&umwait_lock);
return 0;
}
@@ -30,6 +38,7 @@ static int umwait_cpu_online(unsigned int cpu)
*/
static void umwait_syscore_resume(void)
{
+ /* No need to lock because only BP is running now. */
wrmsr(MSR_IA32_UMWAIT_CONTROL, umwait_control_cached, 0);
}
@@ -37,17 +46,95 @@ static struct syscore_ops umwait_syscore_ops = {
.resume = umwait_syscore_resume,
};
+static void umwait_control_msr_update(void *unused)
+{
+ wrmsr(MSR_IA32_UMWAIT_CONTROL, umwait_control_cached, 0);
+}
+
+static u32 get_umwait_control_c02(void)
+{
+ return umwait_control_cached & MSR_IA32_UMWAIT_CONTROL_C02;
+}
+
+static u32 get_umwait_control_max_time(void)
+{
+ return umwait_control_cached & MSR_IA32_UMWAIT_CONTROL_MAX_TIME;
+}
+
+static ssize_t
+enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ /*
+ * When bit 0 in IA32_UMWAIT_CONTROL MSR is 1, C0.2 is disabled.
+ * Otherwise, C0.2 is enabled. Show the opposite of bit 0.
+ */
+ return sprintf(buf, "%d\n", !(bool)get_umwait_control_c02());
+}
+
+static ssize_t enable_c02_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u32 umwait_control_c02;
+ bool c02_enabled;
+ int ret;
+
+ ret = kstrtobool(buf, &c02_enabled);
+ if (ret)
+ return ret;
+
+ mutex_lock(&umwait_lock);
+
+ /*
+ * The value of bit 0 in IA32_UMWAIT_CONTROL MSR is opposite of
+ * c02_enabled.
+ */
+ umwait_control_c02 = (u32)!c02_enabled;
+ if (umwait_control_c02 == get_umwait_control_c02())
+ goto out_unlock;
+
+ umwait_control_cached = umwait_control_c02 | get_umwait_control_max_time();
+ /* Enable/disable C0.2 state on all CPUs */
+ on_each_cpu(umwait_control_msr_update, NULL, 1);
+
+out_unlock:
+ mutex_unlock(&umwait_lock);
+
+ return count;
+}
+static DEVICE_ATTR_RW(enable_c02);
+
+static struct attribute *umwait_attrs[] = {
+ &dev_attr_enable_c02.attr,
+ NULL
+};
+
+static struct attribute_group umwait_attr_group = {
+ .attrs = umwait_attrs,
+ .name = "umwait_control",
+};
+
static int __init umwait_init(void)
{
+ struct device *dev;
int ret;
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
return -ENODEV;
+ /* Add umwait control interface. */
+ dev = cpu_subsys.dev_root;
+ ret = sysfs_create_group(&dev->kobj, &umwait_attr_group);
+ if (ret)
+ return ret;
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait/intel:online",
umwait_cpu_online, NULL);
- if (ret < 0)
+ if (ret < 0) {
+ sysfs_remove_group(&dev->kobj, &umwait_attr_group);
+
return ret;
+ }
register_syscore_ops(&umwait_syscore_ops);
--
2.19.1