Re: [RFC] cpuidle : Add support for pseudo-cpuidle driver
From: Fontenot, Nathan
Date: Thu Aug 20 2020 - 15:33:16 EST
On 7/23/2020 1:13 AM, Abhishek Goel wrote:
> This option adds support for a testing cpuidle driver, which allows
> user to define custom idle states with their respective latencies and
> residencies. This is useful for testing the behaviour of governors on
> customized set of idle states.
>
> This can be used as of now by hard-coding the customized set of cpuidle
> states in the driver. Will add the capability of this driver to be used
> as a module in subsequent patches.
>
> Original idea and discussion for this patch can be found at:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.org%2Flkml%2F2019%2F12%2F17%2F655&data=02%7C01%7CNathan.Fontenot%40amd.com%7C25e5be70d73e4027242208d82ed0096b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637310818339452963&sdata=mO%2BOhH%2BoDiYJJDHUfF%2BFVtgzBAO8H%2FcaiqWACJOxtmE%3D&reserved=0
>
> Signed-off-by: Abhishek Goel <huntbag@xxxxxxxxxxxxxxxxxx>
> ---
A couple of thoughts after getting a chance to play around with this on AMD
systems. Once I added C-states for AMD systems andd moved the driver (see below)
everything works as expected. One issue I do see is that boot is really slow
one the system I'm using. I haven't had a chance to look into why but will
let you know if it's related to the driver.
For this to work on x86 I believe you'll need to have the driver
live in drivers/idle instead of drivers/cpuidle. With ACPI you can only register
one idle driver and the first one to register is set as the driver. Moving
this to drivers/idle allows the psuedo-cpuidle driver to register before the
acpi-idle driver. Introducing a boot option to enable the psuedo-idle driver
would also help by allowing you to build the driver into the kernel and allow
you to boot into the default idle driver.
When selecting the C-state tables to use (sim_type=X) I think you could
use the existing acpi routines to read the C-states directly from the
ACPI tables and then just update the .idle routine pointer. This should
allow the driver to work on any x86 system without having to update
the driver code.
-Nathan
> drivers/cpuidle/Kconfig | 9 ++
> drivers/cpuidle/Makefile | 1 +
> drivers/cpuidle/cpuidle-test.c | 276 +++++++++++++++++++++++++++++++++
> 3 files changed, 286 insertions(+)
> create mode 100644 drivers/cpuidle/cpuidle-test.c
>
> diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
> index c0aeedd66f02..1d73153a0e35 100644
> --- a/drivers/cpuidle/Kconfig
> +++ b/drivers/cpuidle/Kconfig
> @@ -71,6 +71,15 @@ config HALTPOLL_CPUIDLE
> before halting in the guest (more efficient than polling in the
> host via halt_poll_ns for some scenarios).
>
> +config TEST_CPUIDLE
> + tristate "cpuidle test driver"
> + default m
> + help
> + This option enables a testing cpuidle driver, which allows to user
> + to define custom idle states with their respective latencies and residencies.
> + This is useful for testing the behaviour of governors on different
> + set of idle states.
> +
> endif
>
> config ARCH_NEEDS_CPU_IDLE_COUPLED
> diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
> index f07800cbb43f..68ea7dc257b5 100644
> --- a/drivers/cpuidle/Makefile
> +++ b/drivers/cpuidle/Makefile
> @@ -8,6 +8,7 @@ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
> obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
> obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
> obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o
> +obj-$(CONFIG_TEST_CPUIDLE) += cpuidle-test.o
>
> ##################################################################################
> # ARM SoC drivers
> diff --git a/drivers/cpuidle/cpuidle-test.c b/drivers/cpuidle/cpuidle-test.c
> new file mode 100644
> index 000000000000..399729440569
> --- /dev/null
> +++ b/drivers/cpuidle/cpuidle-test.c
> @@ -0,0 +1,276 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * cpuidle-test - Test driver for cpuidle.
> + *
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/moduleparam.h>
> +#include <linux/cpuidle.h>
> +#include <linux/cpu.h>
> +#include <linux/module.h>
> +#include <linux/sched/idle.h>
> +#include <linux/sched/clock.h>
> +#include <linux/sched/idle.h>
> +
> +#define CPUIDLE_STATE_MAX 10
> +#define MAX_PARAM_LENGTH 100
> +
> +static unsigned int nr_states = 4;
> +static unsigned int sim_type = 1;
> +static char name[MAX_PARAM_LENGTH];
> +static char latency_us[MAX_PARAM_LENGTH];
> +static char residency_us[MAX_PARAM_LENGTH];
> +
> +
> +module_param(nr_states, uint, 0644);
> +module_param(sim_type, uint, 0644);
> +module_param_string(name, name, MAX_PARAM_LENGTH, 0644);
> +module_param_string(latency_us, latency_us, MAX_PARAM_LENGTH, 0644);
> +module_param_string(residency_us, residency_us, MAX_PARAM_LENGTH, 0644);
> +
> +static struct cpuidle_driver test_cpuidle_driver = {
> + .name = "test_cpuidle",
> + .owner = THIS_MODULE,
> +};
> +
> +static struct cpuidle_state *cpuidle_state_table __read_mostly;
> +
> +static struct cpuidle_device __percpu *test_cpuidle_devices;
> +static enum cpuhp_state test_hp_idlestate;
> +
> +
> +static int __cpuidle idle_loop(struct cpuidle_device *dev,
> + struct cpuidle_driver *drv,
> + int index)
> +{
> + u64 time_start;
> +
> + local_irq_enable();
> + if (!current_set_polling_and_test()) {
> + while (!need_resched())
> + cpu_relax();
> + }
> +
> + time_start = local_clock();
> +
> + while (local_clock() - time_start < drv->states[index].exit_latency)
> +
> + current_clr_polling();
> +
> + return index;
> +}
> +
> +static struct cpuidle_state cpuidle_states[CPUIDLE_STATE_MAX] = {
> + { /* Snooze */
> + .name = "snooze",
> + .exit_latency = 0,
> + .target_residency = 0,
> + .enter = idle_loop },
> +};
> +
> +static struct cpuidle_state cpuidle_states_ppc[] = {
> + { .name = "snooze",
> + .exit_latency = 0,
> + .target_residency = 0,
> + .enter = idle_loop },
> + {
> + .name = "stop0",
> + .exit_latency = 2,
> + .target_residency = 20,
> + .enter = idle_loop },
> + {
> + .name = "stop1",
> + .exit_latency = 5,
> + .target_residency = 50,
> + .enter = idle_loop },
> + {
> + .name = "stop2",
> + .exit_latency = 10,
> + .target_residency = 100,
> + .enter = idle_loop },
> +};
> +
> +static struct cpuidle_state cpuidle_states_intel[] = {
> + { .name = "poll",
> + .exit_latency = 0,
> + .target_residency = 0,
> + .enter = idle_loop },
> + {
> + .name = "c1",
> + .exit_latency = 2,
> + .target_residency = 2,
> + .enter = idle_loop },
> + {
> + .name = "c1e",
> + .exit_latency = 10,
> + .target_residency = 20,
> + .enter = idle_loop },
> + {
> + .name = "c3",
> + .exit_latency = 80,
> + .target_residency = 211,
> + .enter = idle_loop },
> +};
> +
> +int cpuidle_cpu_online(unsigned int cpu)
> +{
> + struct cpuidle_device *dev;
> +
> + dev = per_cpu_ptr(test_cpuidle_devices, cpu);
> + if (!dev->registered) {
> + dev->cpu = cpu;
> + if (cpuidle_register_device(dev)) {
> + pr_notice("cpuidle_register_device %d failed!\n", cpu);
> + return -EIO;
> + }
> + }
> +
> + return 0;
> +}
> +
> +int cpuidle_cpu_dead(unsigned int cpu)
> +{
> + struct cpuidle_device *dev;
> +
> + dev = per_cpu_ptr(test_cpuidle_devices, cpu);
> + if (dev->registered)
> + cpuidle_unregister_device(dev);
> +
> + return 0;
> +}
> +
> +int cpuidle_driver_init(void)
> +{
> + int idle_state;
> + struct cpuidle_driver *drv = &test_cpuidle_driver;
> +
> + drv->state_count = 0;
> +
> + for (idle_state = 0; idle_state < nr_states; ++idle_state) {
> + /* Is the state not enabled? */
> + if (cpuidle_state_table[idle_state].enter == NULL)
> + continue;
> +
> + drv->states[drv->state_count] = /* structure copy */
> + cpuidle_state_table[idle_state];
> +
> + drv->state_count += 1;
> + }
> +
> + return 0;
> +}
> +
> +int add_cpuidle_states(void)
> +{
> + /* Parse the module param and initialize the idle states here
> + * in cpuidle_state_table.
> + */
> + char *this_param;
> + char *input_name = name;
> + char *input_res = residency_us;
> + char *input_lat = latency_us;
> + int index = 1;
> + long temp;
> + int rc;
> +
> + switch (sim_type) {
> + case 1:
> + cpuidle_state_table = cpuidle_states_ppc;
> + return 0;
> + case 2:
> + cpuidle_state_table = cpuidle_states_intel;
> + return 0;
> + case 3:
> + break;
> + default:
> + pr_warn("Sim value out of bound\n");
> + break;
> + }
> +
> + if (strnlen(input_name, MAX_PARAM_LENGTH)) {
> + while ((this_param = strsep(&input_name, ",")) && index <= nr_states) {
> + strcpy(cpuidle_states[index].name, this_param);
> + cpuidle_states[index].enter = idle_loop;
> + index++;
> + }
> + }
> +
> + if (strnlen(input_res, MAX_PARAM_LENGTH)) {
> + index = 1;
> + while ((this_param = strsep(&input_res, ",")) && index <= nr_states) {
> + rc = kstrtol(this_param, 10, &temp);
> + cpuidle_states[index].target_residency = temp;
> + index++;
> + }
> + }
> +
> + if (strnlen(input_lat, MAX_PARAM_LENGTH)) {
> + index = 1;
> + while ((this_param = strsep(&input_lat, ",")) && index <= nr_states) {
> + rc = kstrtol(this_param, 10, &temp);
> + cpuidle_states[index].exit_latency = temp;
> + index++;
> + }
> + }
> +
> + cpuidle_state_table = cpuidle_states;
> + return nr_states;
> +}
> +
> +void test_cpuidle_uninit(void)
> +{
> + if (test_hp_idlestate)
> + cpuhp_remove_state(test_hp_idlestate);
> + cpuidle_unregister_driver(&test_cpuidle_driver);
> +
> + free_percpu(test_cpuidle_devices);
> + test_cpuidle_devices = NULL;
> +}
> +
> +int __init test_cpuidle_init(void)
> +{
> + int retval;
> +
> + add_cpuidle_states();
> + cpuidle_driver_init();
> + retval = cpuidle_register(&test_cpuidle_driver, NULL);
> + if (retval) {
> + printk(KERN_DEBUG "Registration of test driver failed.\n");
> + return retval;
> + }
> +
> + test_cpuidle_devices = alloc_percpu(struct cpuidle_device);
> + if (test_cpuidle_devices == NULL) {
> + cpuidle_unregister_driver(&test_cpuidle_driver);
> + return -ENOMEM;
> + }
> +
> + retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> + "cpuidle/powernv:online",
> + cpuidle_cpu_online,
> + cpuidle_cpu_dead);
> +
> + if (retval < 0) {
> + test_cpuidle_uninit();
> + } else {
> + test_hp_idlestate = retval;
> + retval = 0;
> + }
> +
> + return retval;
> +}
> +
> +void __exit test_cpuidle_exit(void)
> +{
> + test_cpuidle_uninit();
> +}
> +
> +module_init(test_cpuidle_init);
> +module_exit(test_cpuidle_exit);
> +MODULE_DESCRIPTION("Test Cpuidle Driver");
> +MODULE_AUTHOR("Abhishek Goel");
> +MODULE_LICENSE("GPL");
> +
>