[RFC 1/3] cpuidle: add poll_source API

From: Stefan Hajnoczi
Date: Tue Jul 13 2021 - 12:19:26 EST


Introduce an API for adding cpuidle poll callbacks:

struct poll_source_ops {
void (*start)(struct poll_source *src);
void (*stop)(struct poll_source *src);
void (*poll)(struct poll_source *src);
};

int poll_source_register(struct poll_source *src);
int poll_source_unregister(struct poll_source *src);

When cpuidle enters the poll state it invokes ->start() and then invokes
->poll() repeatedly from the busy wait loop. Finally ->stop() is invoked
when the busy wait loop finishes.

The ->poll() function should check for activity and cause
TIF_NEED_RESCHED to be set in order to stop the busy wait loop.

This API is intended to be used by drivers that can cheaply poll for
events. Participating in cpuidle polling allows them to avoid interrupt
latencies during periods where the CPU is going to poll anyway.

Note that each poll_source is bound to a particular CPU. The API is
mainly intended to by used by drivers that have multiple queues with irq
affinity.

Signed-off-by: Stefan Hajnoczi <stefanha@xxxxxxxxxx>
---
drivers/cpuidle/Makefile | 1 +
include/linux/poll_source.h | 53 +++++++++++++++++++
drivers/cpuidle/poll_source.c | 99 +++++++++++++++++++++++++++++++++++
drivers/cpuidle/poll_state.c | 6 +++
4 files changed, 159 insertions(+)
create mode 100644 include/linux/poll_source.h
create mode 100644 drivers/cpuidle/poll_source.c

diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 26bbc5e74123..994f72d6fe95 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -7,6 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
+obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_source.o
obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o

##################################################################################
diff --git a/include/linux/poll_source.h b/include/linux/poll_source.h
new file mode 100644
index 000000000000..ccfb424e170b
--- /dev/null
+++ b/include/linux/poll_source.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * poll_source.h - cpuidle busy waiting API
+ */
+#ifndef __LINUX_POLLSOURCE_H__
+#define __LINUX_POLLSOURCE_H__
+
+#include <linux/list.h>
+
+struct poll_source;
+
+struct poll_source_ops {
+ void (*start)(struct poll_source *src);
+ void (*stop)(struct poll_source *src);
+ void (*poll)(struct poll_source *src);
+};
+
+struct poll_source {
+ const struct poll_source_ops *ops;
+ struct list_head node;
+ int cpu;
+};
+
+/**
+ * poll_source_register - Add a poll_source for a CPU
+ */
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
+int poll_source_register(struct poll_source *src);
+#else
+static inline int poll_source_register(struct poll_source *src)
+{
+ return 0;
+}
+#endif
+
+/**
+ * poll_source_unregister - Remove a previously registered poll_source
+ */
+#if defined(CONFIG_CPU_IDLE) && defined(CONFIG_ARCH_HAS_CPU_RELAX)
+int poll_source_unregister(struct poll_source *src);
+#else
+static inline int poll_source_unregister(struct poll_source *src)
+{
+ return 0;
+}
+#endif
+
+/* Used by the cpuidle driver */
+void poll_source_start(void);
+void poll_source_run_once(void);
+void poll_source_stop(void);
+
+#endif /* __LINUX_POLLSOURCE_H__ */
diff --git a/drivers/cpuidle/poll_source.c b/drivers/cpuidle/poll_source.c
new file mode 100644
index 000000000000..46100e5a71e4
--- /dev/null
+++ b/drivers/cpuidle/poll_source.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * poll_source.c - cpuidle busy waiting API
+ */
+
+#include <linux/lockdep.h>
+#include <linux/percpu.h>
+#include <linux/poll_source.h>
+
+/* The per-cpu list of registered poll sources */
+DEFINE_PER_CPU(struct list_head, poll_source_list);
+
+/* Called from idle task with TIF_POLLING_NRFLAG set and irqs enabled */
+void poll_source_start(void)
+{
+ struct poll_source *src;
+
+ list_for_each_entry(src, this_cpu_ptr(&poll_source_list), node)
+ src->ops->start(src);
+}
+
+/* Called from idle task with TIF_POLLING_NRFLAG set and irqs enabled */
+void poll_source_run_once(void)
+{
+ struct poll_source *src;
+
+ list_for_each_entry(src, this_cpu_ptr(&poll_source_list), node)
+ src->ops->poll(src);
+}
+
+/* Called from idle task with TIF_POLLING_NRFLAG set and irqs enabled */
+void poll_source_stop(void)
+{
+ struct poll_source *src;
+
+ list_for_each_entry(src, this_cpu_ptr(&poll_source_list), node)
+ src->ops->stop(src);
+}
+
+static void poll_source_register_this_cpu(void *opaque)
+{
+ struct poll_source *src = opaque;
+
+ lockdep_assert_irqs_disabled();
+
+ list_add_tail(&src->node, this_cpu_ptr(&poll_source_list));
+}
+
+int poll_source_register(struct poll_source *src)
+{
+ if (!list_empty(&src->node))
+ return -EBUSY;
+
+ /*
+ * There is no race with src->cpu iterating over poll_source_list
+ * because smp_call_function_single() just sets TIF_NEED_RESCHED
+ * instead of sending an IPI during idle.
+ */
+ /* TODO but what happens if the flag isn't set yet when smp_call_function_single() is invoked? */
+ return smp_call_function_single(src->cpu,
+ poll_source_register_this_cpu,
+ src,
+ 1);
+}
+EXPORT_SYMBOL_GPL(poll_source_register);
+
+static void poll_source_unregister_this_cpu(void *opaque)
+{
+ struct poll_source *src = opaque;
+
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * See comment in poll_source_register() about why this does not race
+ * with the idle CPU iterating over poll_source_list.
+ */
+ list_del_init(&src->node);
+}
+
+int poll_source_unregister(struct poll_source *src)
+{
+ return smp_call_function_single(src->cpu,
+ poll_source_unregister_this_cpu,
+ src,
+ 1);
+}
+EXPORT_SYMBOL_GPL(poll_source_unregister);
+
+/* TODO what happens when a CPU goes offline? */
+static int __init poll_source_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(poll_source_list, i));
+
+ return 0;
+}
+core_initcall(poll_source_init);
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index f7e83613ae94..aa26870034ac 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -7,6 +7,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/sched/idle.h>
+#include <linux/poll_source.h>

#define POLL_IDLE_RELAX_COUNT 200

@@ -22,9 +23,12 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
unsigned int loop_count = 0;
u64 limit;

+ poll_source_start();
+
limit = cpuidle_poll_time(drv, dev);

while (!need_resched()) {
+ poll_source_run_once();
cpu_relax();
if (loop_count++ < POLL_IDLE_RELAX_COUNT)
continue;
@@ -35,6 +39,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
break;
}
}
+
+ poll_source_stop();
}
current_clr_polling();

--
2.31.1