[RFC PATCH v2 2/5] kernel: allow to configure PREEMPT_NONE, PREEMPT_VOLUNTARY on kernel command line

From: Michal Hocko
Date: Fri Oct 09 2020 - 08:30:14 EST


From: Michal Hocko <mhocko@xxxxxxxx>

Many people are still relying on pre built distribution kernels and so
distributions have to provide mutliple kernel flavors to offer different
preemption models. Most of them are providing PREEMPT_NONE for typical
server deployments and PREEMPT_VOLUNTARY for desktop users.

Having two different kernel binaries differing only by the preemption
mode seems rather wasteful and inflexible. Especially when the difference
between PREEMPT_NONE and PREEMPT_VOLUNTARY is really minimal. Both only
allow explicit scheduling points while running in the kernel and it is
only might_sleep which adds additional preemption points for
PREEMPT_VOLUNTARY.

Add a kernel command line parameter preempt_mode=[none, voluntary] which
allows to override the default compile time preemption mode
(CONFIG_PREEMPT_NONE resp. CONFIG_PREEMPT_VOLUTARY). Voluntary preempt
mode is guarded by a jump label to prevent any potential runtime overhead.

Add an explicit include of jump_label to gpio/consumer.h to make sure
all its consumers will get static_branch_likely) as kernel.h cannot
include it directly.

Signed-off-by: Michal Hocko <mhocko@xxxxxxxx>
---
.../admin-guide/kernel-parameters.txt | 5 ++++
include/linux/gpio/consumer.h | 1 +
include/linux/kernel.h | 13 ++++++--
kernel/sched/core.c | 30 +++++++++++++++++++
4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1068742a6df..96bb74faeb50 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3873,6 +3873,11 @@
Format: {"off"}
Disable Hardware Transactional Memory

+ preempt_mode={none,voluntary}
+ Set the preemption mode.
+ none - equivalent to CONFIG_PREEMPT_NONE
+ voluntary - equivalent to CONFIG_PREEMPT_VOLUNTARY
+
print-fatal-signals=
[KNL] debug: print fatal signals

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 901aab89d025..d64e6dda5755 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -3,6 +3,7 @@
#define __LINUX_GPIO_CONSUMER_H

#include <linux/bits.h>
+#include <linux/jump_label.h>
#include <linux/bug.h>
#include <linux/compiler_types.h>
#include <linux/err.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index c25b8e41c0ea..d2d37bd5ecd5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
#include <linux/typecheck.h>
#include <linux/printk.h>
#include <linux/build_bug.h>
+#include <linux/jump_label_type.h>
#include <asm/byteorder.h>
#include <asm/div64.h>
#include <uapi/linux/kernel.h>
@@ -192,11 +193,19 @@ struct completion;
struct pt_regs;
struct user;

+#ifndef CONFIG_PREEMPTION
#ifdef CONFIG_PREEMPT_VOLUNTARY
+DECLARE_STATIC_KEY_TRUE(preempt_voluntary_key);
+#else
+DECLARE_STATIC_KEY_FALSE(preempt_voluntary_key);
+#endif
+
extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+# define might_resched() \
+ do { if (static_branch_likely(&preempt_voluntary_key)) _cond_resched(); } while (0)
#else
-# define might_resched() do { } while (0)
+# define might_resched() \
+ do { } while (0)
#endif

#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d95dc3f4644..07d37d862637 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);

DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

+#ifdef CONFIG_PREEMPT_VOLUNTARY
+DEFINE_STATIC_KEY_TRUE(preempt_voluntary_key);
+#else
+/* PREEMPT_NONE vs PREEMPT_VOLUNTARY */
+DEFINE_STATIC_KEY_FALSE(preempt_voluntary_key);
+#endif
+EXPORT_SYMBOL(preempt_voluntary_key);
+
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
/*
* Debugging: various feature bits
@@ -8482,3 +8490,25 @@ void call_trace_sched_update_nr_running(struct rq *rq, int count)
{
trace_sched_update_nr_running_tp(rq, count);
}
+
+#ifndef CONFIG_PREEMPTION
+static int __init setup_non_preempt_mode(char *str)
+{
+ if (!strcmp(str, "none")) {
+ if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+ static_branch_disable(&preempt_voluntary_key);
+ pr_info("Switching to PREEMPT_NONE mode.");
+ }
+ } else if (!strcmp(str, "voluntary")) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
+ static_branch_enable(&preempt_voluntary_key);
+ pr_info("Switching to PREEMPT_VOLUNTARY mode.");
+ }
+ } else {
+ pr_warn("Unsupported preempt mode %s\n", str);
+ return 1;
+ }
+ return 0;
+}
+__setup("preempt_mode=", setup_non_preempt_mode);
+#endif
--
2.28.0