Re: [RFC PATCH v2 1/3] cpuidle/powernv: Add support for states with ibm,cpuidle-state-v1
From: Frank Rowand
Date: Thu Oct 11 2018 - 15:56:01 EST
+ devicetree mail list
On 10/11/18 06:22, Akshay Adiga wrote:
> This patch adds support for new device-tree format for idle state
> description.
>
> Previously if a older kernel runs on a newer firmware, it may enable
> all available states irrespective of its capability of handling it.
> New device tree format adds a compatible flag, so that only kernel
> which has the capability to handle the version of stop state will enable
> it.
>
> Older kernel will still see stop0 and stop0_lite in older format and we
> will depricate it after some time.
>
> 1) Idea is to bump up the version in firmware if we find a bug or
> regression in stop states. A fix will be provided in linux which would
> now know about the bumped up version of stop states, where as kernel
> without fixes would ignore the states.
>
> 2) Slowly deprecate cpuidle /cpuhotplug threshold which is hard-coded
> into cpuidle-powernv driver. Instead use compatible strings to indicate
> if idle state is suitable for cpuidle and hotplug.
>
> New idle state device tree format :
> power-mgt {
> ...
> ibm,enabled-stop-levels = <0xec000000>;
> ibm,cpu-idle-state-psscr-mask = <0x0 0x3003ff 0x0 0x3003ff>;
> ibm,cpu-idle-state-latencies-ns = <0x3e8 0x7d0>;
> ibm,cpu-idle-state-psscr = <0x0 0x330 0x0 0x300330>;
> ibm,cpu-idle-state-flags = <0x100000 0x101000>;
> ibm,cpu-idle-state-residency-ns = <0x2710 0x4e20>;
> ibm,idle-states {
> stop4 {
> flags = <0x207000>;
> compatible = "ibm,state-v1",
> "opal-supported";
> type = "cpuidle";
> psscr-mask = <0x0 0x3003ff>;
> handle = <0x102>;
> latency-ns = <0x186a0>;
> residency-ns = <0x989680>;
> psscr = <0x0 0x300374>;
> };
> ...
> stop11 {
> ...
> compatible = "ibm,state-v1",
> "opal-supported";
> type = "cpuoffline";
> ...
> };
> };
> type strings :
> "cpuidle" : indicates it should be used by cpuidle-driver
> "cpuoffline" : indicates it should be used by hotplug driver
>
> compatible strings :
> "ibm,state-v1" : kernel checks if it knows about this version
> "opal-supported" : indicates kernel can fall back to use opal
> for stop-transitions
>
> Signed-off-by: Akshay Adiga <akshay.adiga@xxxxxxxxxxxxxxxxxx>
> ---
>
> Changes from v1 :
> - Code is rebased on Nick Piggin's v4 patch "powerpc/64s: reimplement book3s
> idle code in C"
> - Moved "cpuidle" and "cpuoffline" as seperate property called
> "type"
>
>
> arch/powerpc/include/asm/cpuidle.h | 9 ++
> arch/powerpc/platforms/powernv/idle.c | 132 +++++++++++++++++++++++++-
> drivers/cpuidle/cpuidle-powernv.c | 31 ++++--
> 3 files changed, 160 insertions(+), 12 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
> index 9844b3ded187..e920a15e797f 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -70,14 +70,23 @@
>
> #ifndef __ASSEMBLY__
>
> +enum idle_state_type_t {
> + CPUIDLE_TYPE,
> + CPUOFFLINE_TYPE
> +};
> +
> +#define POWERNV_THRESHOLD_LATENCY_NS 200000
> +#define PNV_VER_NAME_LEN 32
> #define PNV_IDLE_NAME_LEN 16
> struct pnv_idle_states_t {
> char name[PNV_IDLE_NAME_LEN];
> + char version[PNV_VER_NAME_LEN];
> u32 latency_ns;
> u32 residency_ns;
> u64 psscr_val;
> u64 psscr_mask;
> u32 flags;
> + enum idle_state_type_t type;
> bool valid;
> };
>
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index 96186af9e953..755918402591 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -54,6 +54,20 @@ static bool default_stop_found;
> static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
> static u64 pnv_first_hv_loss_level = MAX_STOP_STATE + 1;
>
> +
> +static int parse_dt_v1(struct device_node *np);
> +struct stop_version_t {
> + const char name[PNV_VER_NAME_LEN];
> + int (*parser_fn)(struct device_node *np);
> +};
> +struct stop_version_t known_versions[] = {
> + {
> + .name = "ibm,state-v1",
> + .parser_fn = parse_dt_v1,
> + }
> + };
> +const int nr_known_versions = 1;
> +
> /*
> * psscr value and mask of the deepest stop idle state.
> * Used when a cpu is offlined.
> @@ -1195,6 +1209,77 @@ static void __init pnv_probe_idle_states(void)
> supported_cpuidle_states |= pnv_idle_states[i].flags;
> }
>
> +static int parse_dt_v1(struct device_node *dt_node)
> +{
> + const char *temp_str;
> + int rc;
> + int i = nr_pnv_idle_states;
> +
> + if (!dt_node) {
> + pr_err("Invalid device_node\n");
> + return -EINVAL;
> + }
> +
> + rc = of_property_read_string(dt_node, "name", &temp_str);
> + if (rc) {
> + pr_err("error reading names rc= %d\n", rc);
> + return -EINVAL;
> + }
> + strncpy(pnv_idle_states[i].name, temp_str, PNV_IDLE_NAME_LEN);
> + rc = of_property_read_u32(dt_node, "residency-ns",
> + &pnv_idle_states[i].residency_ns);
> + if (rc) {
> + pr_err("error reading residency rc= %d\n", rc);
> + return -EINVAL;
> + }
> + rc = of_property_read_u32(dt_node, "latency-ns",
> + &pnv_idle_states[i].latency_ns);
> + if (rc) {
> + pr_err("error reading latency rc= %d\n", rc);
> + return -EINVAL;
> + }
> + rc = of_property_read_u32(dt_node, "flags",
> + &pnv_idle_states[i].flags);
> + if (rc) {
> + pr_err("error reading flags rc= %d\n", rc);
> + return -EINVAL;
> + }
> +
> + /* We are not expecting power8 device-tree in this format */
> + rc = of_property_read_u64(dt_node, "psscr-mask",
> + &pnv_idle_states[i].psscr_mask);
> + if (rc) {
> + pr_err("error reading psscr-mask rc= %d\n", rc);
> + return -EINVAL;
> + }
> + rc = of_property_read_u64(dt_node, "psscr",
> + &pnv_idle_states[i].psscr_val);
> + if (rc) {
> + pr_err("error reading psscr rc= %d\n", rc);
> + return -EINVAL;
> + }
> +
> + /*
> + * TODO : save the version strings in data structure
> + */
> + rc = of_property_read_string(dt_node, "type", &temp_str);
> + pr_info("type = %s\n", temp_str);
> + if (rc) {
> + pr_err("error reading type rc= %d\n", rc);
> + return -EINVAL;
> + }
> + if (strcmp(temp_str, "cpuidle") == 0)
> + pnv_idle_states[i].type = CPUIDLE_TYPE;
> + else if (strcmp(temp_str, "cpuoffline") == 0)
> + pnv_idle_states[i].type = CPUOFFLINE_TYPE;
> + else {
> + pr_err("Invalid type skipping %s\n",
> + pnv_idle_states[i].name);
> + return -EINVAL;
> + }
> + return 0;
> +
> +}
> /*
> * This function parses device-tree and populates all the information
> * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
> @@ -1203,8 +1288,9 @@ static void __init pnv_probe_idle_states(void)
>
> static int pnv_parse_cpuidle_dt(void)
> {
> - struct device_node *np;
> + struct device_node *np, *np1, *dt_node;
> int nr_idle_states, i;
> + int additional_states = 0;
> int rc = 0;
> u32 *temp_u32;
> u64 *temp_u64;
> @@ -1218,8 +1304,14 @@ static int pnv_parse_cpuidle_dt(void)
> nr_idle_states = of_property_count_u32_elems(np,
> "ibm,cpu-idle-state-flags");
>
> - pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
> - GFP_KERNEL);
> + np1 = of_find_node_by_path("/ibm,opal/power-mgt/ibm,idle-states");
> + if (np1) {
> + for_each_child_of_node(np1, dt_node)
> + additional_states++;
> + }
> + pr_info("states in new format : %d\n", additional_states);
> + pnv_idle_states = kcalloc(nr_idle_states + additional_states,
> + sizeof(*pnv_idle_states), GFP_KERNEL);
> temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL);
> temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL);
> temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL);
> @@ -1298,8 +1390,40 @@ static int pnv_parse_cpuidle_dt(void)
> for (i = 0; i < nr_idle_states; i++)
> strlcpy(pnv_idle_states[i].name, temp_string[i],
> PNV_IDLE_NAME_LEN);
> +
> + /* Mark states as CPUIDLE_TYPE /CPUOFFLINE for older version*/
> + for (i = 0; i < nr_idle_states; i++) {
> + if (pnv_idle_states[i].latency_ns > POWERNV_THRESHOLD_LATENCY_NS)
> + pnv_idle_states[i].type = CPUOFFLINE_TYPE;
> + else
> + pnv_idle_states[i].type = CPUIDLE_TYPE;
> + }
> nr_pnv_idle_states = nr_idle_states;
> - rc = 0;
> + /* Parsing node-based idle states device-tree format */
> + if (!np1) {
> + pr_info("dt does not contain ibm,idle_states");
> + goto out;
> + }
> + /* Parse each child node with appropriate parser_fn */
> + for_each_child_of_node(np1, dt_node) {
> + bool found_known_version = false;
> + /* we don't have state falling back to opal*/
> + for (i = 0; i < nr_known_versions ; i++) {
> + if (of_device_is_compatible(dt_node, known_versions[i].name)) {
> + rc = known_versions[i].parser_fn(dt_node);
> + if (rc) {
> + pr_err("%s could not parse\n", known_versions[i].name);
> + continue;
> + }
> + found_known_version = true;
> + }
> + }
> + if (!found_known_version) {
> + pr_info("Unsupported state, skipping all further state\n");
> + goto out;
> + }
> + nr_pnv_idle_states++;
> + }
> out:
> kfree(temp_u32);
> kfree(temp_u64);
> diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
> index 84b1ebe212b3..a15514ebd1c3 100644
> --- a/drivers/cpuidle/cpuidle-powernv.c
> +++ b/drivers/cpuidle/cpuidle-powernv.c
> @@ -26,7 +26,6 @@
> * Expose only those Hardware idle states via the cpuidle framework
> * that have latency value below POWERNV_THRESHOLD_LATENCY_NS.
> */
> -#define POWERNV_THRESHOLD_LATENCY_NS 200000
>
> static struct cpuidle_driver powernv_idle_driver = {
> .name = "powernv_idle",
> @@ -265,7 +264,7 @@ extern u32 pnv_get_supported_cpuidle_states(void);
> static int powernv_add_idle_states(void)
> {
> int nr_idle_states = 1; /* Snooze */
> - int dt_idle_states;
> + int dt_idle_states = 0;
> u32 has_stop_states = 0;
> int i;
> u32 supported_flags = pnv_get_supported_cpuidle_states();
> @@ -277,14 +276,19 @@ static int powernv_add_idle_states(void)
> goto out;
> }
>
> - /* TODO: Count only states which are eligible for cpuidle */
> - dt_idle_states = nr_pnv_idle_states;
> + /* Count only cpuidle states*/
> + for (i = 0; i < nr_pnv_idle_states; i++) {
> + if (pnv_idle_states[i].type == CPUIDLE_TYPE)
> + dt_idle_states++;
> + }
> + pr_info("idle states in dt = %d , states with idle flag = %d",
> + nr_pnv_idle_states, dt_idle_states);
>
> /*
> * Since snooze is used as first idle state, max idle states allowed is
> * CPUIDLE_STATE_MAX -1
> */
> - if (nr_pnv_idle_states > CPUIDLE_STATE_MAX - 1) {
> + if (dt_idle_states > CPUIDLE_STATE_MAX - 1) {
> pr_warn("cpuidle-powernv: discovered idle states more than allowed");
> dt_idle_states = CPUIDLE_STATE_MAX - 1;
> }
> @@ -305,8 +309,15 @@ static int powernv_add_idle_states(void)
> * Skip the platform idle state whose flag isn't in
> * the supported_cpuidle_states flag mask.
> */
> - if ((state->flags & supported_flags) != state->flags)
> + if ((state->flags & supported_flags) != state->flags) {
> + pr_warn("State %d does not have supported flag\n", i);
> + continue;
> + }
> + if (state->type != CPUIDLE_TYPE) {
> + pr_info("State %d is not idletype, it of %d type\n", i,
> + state->type);
> continue;
> + }
> /*
> * If an idle state has exit latency beyond
> * POWERNV_THRESHOLD_LATENCY_NS then don't use it
> @@ -321,8 +332,10 @@ static int powernv_add_idle_states(void)
> exit_latency = DIV_ROUND_UP(state->latency_ns, 1000);
> target_residency = DIV_ROUND_UP(state->residency_ns, 1000);
>
> - if (has_stop_states && !(state->valid))
> + if (has_stop_states && !(state->valid)) {
> + pr_warn("State %d is invalid\n", i);
> continue;
> + }
>
> if (state->flags & OPAL_PM_TIMEBASE_STOP)
> stops_timebase = true;
> @@ -360,8 +373,10 @@ static int powernv_add_idle_states(void)
> state->psscr_mask);
> }
> #endif
> - else
> + else {
> + pr_warn("cpuidle-powernv : could not add state\n");
> continue;
> + }
> nr_idle_states++;
> }
> out:
>