Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

From: Alexander Graf
Date: Mon Nov 16 2020 - 10:38:09 EST




On 13.11.20 16:55, Joel Fernandes wrote:


+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ coresched_cmd = CORE_SCHED_OFF;
+ else if (!strcmp(arg, "on"))
+ coresched_cmd = CORE_SCHED_ON;
+ else if (!strcmp(arg, "secure"))
+ coresched_cmd = CORE_SCHED_SECURE;
+ else
+ pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+ arg);
+
+ return 0;


Instead of calling the matching function over and over again, can we just
configure a static branch (see below) based on the command line setting
here? Or do we not know about the bugs yet?
[...]
+static bool __coresched_supported(void)
+{
+ /* coreched=off command line option. */
+ if (coresched_cmd_off())
+ return false;
+
+ /*
+ * Some arch may not need coresched, example some x86 may not need
+ * coresched if coresched=secure option is passed (=secure is default).
+ */
+ return arch_allow_core_sched();
+}
+
void sched_core_get(void)
{
+ if (!__coresched_supported())
+ return;

I would expect core scheduling to be basically an option that you set once
and never flip. This sounds like a prefect use case for a static branch to
me?

Something ike so then? If Ok, let me know if I can add your Reviewed-by tag.

thanks!

---8<-----------------------

From: "Joel Fernandes (Google)" <joel@xxxxxxxxxxxxxxxxx>
Subject: [PATCH] sched: Add a coresched command line option

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow those device down.

However, some users may want core scheduling even if the hardware is
secure. To support them, add a coresched= option which defaults to
'secure' and can be overridden to 'on' if the user wants to enable
coresched even if the HW is not vulnerable. 'off' would disable
core scheduling in any case.

Also add a sched_debug entry to indicate if core scheduling is turned on
or not.

Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
---
arch/x86/kernel/cpu/bugs.c | 19 +++++++++++++++++++
include/linux/cpu.h | 1 +
include/linux/sched/smt.h | 4 ++++
kernel/cpu.c | 38 ++++++++++++++++++++++++++++++++++++++
kernel/sched/core.c | 6 ++++++
kernel/sched/debug.c | 4 ++++
6 files changed, 72 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..7607c9cd7f0f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
static void __init mds_print_mitigation(void);
static void __init taa_select_mitigation(void);
static void __init srbds_select_mitigation(void);
+static void __init coresched_select(void);

/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -103,6 +104,9 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;

+ /* Update whether core-scheduling is needed. */
+ coresched_select();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -1808,4 +1812,19 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
+
+/*
+ * When coresched=secure command line option is passed (default), disable core
+ * scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+static void __init coresched_select(void)
+{
+#ifdef CONFIG_SCHED_CORE
+ if (!coresched_cmd_secure())

Make this a positive branch instead please :).

/*
* Disable core scheduling on non-MDS, non-L1TF systems
* when coresched=secure (default)
*/
if (coresched_cmd_secure() &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_L1TF))
static_branch_disable(&sched_coresched_supported);

+ return;
+ if (!boot_cpu_has_bug(X86_BUG_MDS) && !boot_cpu_has_bug(X86_BUG_L1TF))
+ static_branch_disable(&sched_coresched_supported);
+#endif
+}
+
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d6428aaf67e7..d1f1e64316d6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,4 +228,5 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0;
extern bool cpu_mitigations_off(void);
extern bool cpu_mitigations_auto_nosmt(void);

+extern bool coresched_cmd_secure(void);
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index 59d3736c454c..561064eb3268 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -17,4 +17,8 @@ static inline bool sched_smt_active(void) { return false; }

void arch_smt_update(void);

+#ifdef CONFIG_SCHED_CORE
+extern struct static_key_true sched_coresched_supported;
+#endif
+
#endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ff2578ecf17..b1cdfc7616b4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2552,3 +2552,41 @@ bool cpu_mitigations_auto_nosmt(void)
return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
+
+/*
+ * These are used for a global "coresched=" cmdline option for controlling
+ * core scheduling. Note that core sched may be needed for usecases other
+ * than security as well.
+ */
+enum coresched_cmds {
+ CORE_SCHED_OFF,
+ CORE_SCHED_SECURE,
+ CORE_SCHED_ON,
+};
+
+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+ if (!strcmp(arg, "off"))
+ coresched_cmd = CORE_SCHED_OFF;
+ else if (!strcmp(arg, "on"))
+ coresched_cmd = CORE_SCHED_ON;
+ else if (!strcmp(arg, "secure"))
+ coresched_cmd = CORE_SCHED_SECURE;
+ else
+ pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+ arg);
+
+ if (coresched_cmd == CORE_SCHED_OFF)
+ static_branch_disable(&sched_coresched_supported);
+ return 0;
+}
+early_param("coresched", coresched_parse_cmdline);
+
+/* coresched=secure */
+bool coresched_cmd_secure(void)
+{
+ return coresched_cmd == CORE_SCHED_SECURE;
+}
+EXPORT_SYMBOL_GPL(coresched_cmd_secure);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5ed26b469ed6..959fddf7d8de 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -333,8 +333,12 @@ static void __sched_core_disable(void)
printk("core sched disabled\n");
}

+DEFINE_STATIC_KEY_TRUE(sched_coresched_supported);
+
void sched_core_get(void)
{
+ if (!static_branch_likely(&sched_coresched_supported))
+ return;
mutex_lock(&sched_core_mutex);
if (!sched_core_count++)
__sched_core_enable();
@@ -343,6 +347,8 @@ void sched_core_get(void)

void sched_core_put(void)
{
+ if (!static_branch_likely(&sched_coresched_supported))
+ return;
mutex_lock(&sched_core_mutex);
if (!--sched_core_count)
__sched_core_disable();
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 88bf45267672..935b68be18cd 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -782,6 +782,10 @@ static void sched_debug_header(struct seq_file *m)
"sysctl_sched_tunable_scaling",
sysctl_sched_tunable_scaling,
sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
+#ifdef CONFIG_SCHED_CORE
+ SEQ_printf(m, " .%-40s: %d\n", "core_sched_enabled",
+ !!static_branch_likely(&__sched_core_enabled));
+#endif
SEQ_printf(m, "\n");
}



[...]

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a36f08d74e09..8de377dc8592 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -698,6 +698,15 @@
/proc/<pid>/coredump_filter.
See also Documentation/filesystems/proc.rst.

+ coresched=

It would be nice to list the possible arguments here too.

+ [SCHED_CORE] Enable/disable the core scheduling feature.
+ A value of 'on' keeps coresched on always. A value of

This reads as if coresched=on means that all your tasks are core scheduled. I'd prefer if you could clarify the option a bit to mean that this *plus* tagging gets your core scheduling.


Alex

+ 'off' keeps coresched off always. A value of 'secure'
+ keeps it on only if the system has vulnerabilities. Defaults
+ to 'secure'. A user who has a non-security usecase that needs
+ core scheduling, such as improving performance of VMs by
+ tagging vCPU should pass 'on' to force it on.
+
coresight_cpu_debug.enable
[ARM,ARM64]
Format: <bool>





Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879