Re: [RFC V3 3/4] soc: qcom: Introduce SCMI based Memlat (Memory Latency) governor

From: Konrad Dybcio
Date: Tue Jul 09 2024 - 06:52:01 EST


On 2.07.2024 9:14 PM, Sibi Sankar wrote:
> Introduce a client driver that uses the memlat algorithm string hosted
> on ARM SCMI QCOM Vendor Protocol to detect memory latency workloads and
> control frequency/level of the various memory buses (DDR/LLCC/DDR_QOS).
>
> Co-developed-by: Shivnandan Kumar <quic_kshivnan@xxxxxxxxxxx>
> Signed-off-by: Shivnandan Kumar <quic_kshivnan@xxxxxxxxxxx>
> Co-developed-by: Ramakrishna Gottimukkula <quic_rgottimu@xxxxxxxxxxx>
> Signed-off-by: Ramakrishna Gottimukkula <quic_rgottimu@xxxxxxxxxxx>
> Co-developed-by: Amir Vajid <avajid@xxxxxxxxxxx>
> Signed-off-by: Amir Vajid <avajid@xxxxxxxxxxx>
> Signed-off-by: Sibi Sankar <quic_sibis@xxxxxxxxxxx>
> ---

[...]

> +/**
> + * scmi_memlat_protocol_cmd - parameter_ids supported by the "MEMLAT" algo_str hosted
> + * by the Qualcomm SCMI Vendor Protocol on the SCMI controller.

'enum scmi_mem..'

> +static int populate_cluster_info(u32 *cluster_info)
> +{
> + char name[MAX_NAME_LEN];
> + int i = 0;
> +
> + struct device_node *cn __free(device_node) = of_find_node_by_path("/cpus");
> + if (!cn)
> + return -ENODEV;
> +
> + struct device_node *map __free(device_node) = of_get_child_by_name(cn, "cpu-map");
> + if (!map)
> + return -ENODEV;
> +
> + do {
> + snprintf(name, sizeof(name), "cluster%d", i);
> + struct device_node *c __free(device_node) = of_get_child_by_name(map, name);
> + if (!c)
> + break;
> +
> + *(cluster_info + i) = of_get_child_count(c);
> + i++;
> + } while (1);

of_cpu_device_node_get(0) + of_get_next_cpu_node() +
of_get_cpu_hwid() & MPIDR_EL1.Aff2 [1]

[...]

> +static struct cpufreq_memfreq_map *init_cpufreq_memfreq_map(struct device *dev,
> + struct scmi_memory_info *memory,
> + struct device_node *of_node,
> + u32 *cnt)
> +{
> + struct device_node *tbl_np, *opp_np;
> + struct cpufreq_memfreq_map *tbl;
> + int ret, i = 0;
> + u32 level, len;
> + u64 rate;
> +
> + tbl_np = of_parse_phandle(of_node, "operating-points-v2", 0);
> + if (!tbl_np)
> + return ERR_PTR(-ENODEV);
> +
> + len = min(of_get_available_child_count(tbl_np), MAX_MAP_ENTRIES);
> + if (len == 0)
> + return ERR_PTR(-ENODEV);
> +
> + tbl = devm_kzalloc(dev, (len + 1) * sizeof(struct cpufreq_memfreq_map),
> + GFP_KERNEL);
> + if (!tbl)
> + return ERR_PTR(-ENOMEM);
> +
> + for_each_available_child_of_node(tbl_np, opp_np) {
> + ret = of_property_read_u64_index(opp_np, "opp-hz", 0, &rate);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + tbl[i].cpufreq_mhz = rate / HZ_PER_MHZ;
> +
> + if (memory->hw_type != QCOM_MEM_TYPE_DDR_QOS) {
> + ret = of_property_read_u64_index(opp_np, "opp-hz", 1, &rate);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + tbl[i].memfreq_khz = rate / HZ_PER_KHZ;
> + } else {
> + ret = of_property_read_u32(opp_np, "opp-level", &level);
> + if (ret < 0)
> + return ERR_PTR(ret);
> +
> + tbl[i].memfreq_khz = level;
> + }
> +
> + dev_dbg(dev, "Entry%d CPU:%u, Mem:%u\n", i, tbl[i].cpufreq_mhz, tbl[i].memfreq_khz);
> + i++;
> + }
> + *cnt = len;
> + tbl[i].cpufreq_mhz = 0;

missing of_node_put, or even better __free(device_node)

[...]

> + /*
> + * Variants of the SoC having reduced number of cpus operate
> + * with the same number of logical cpus but the physical
> + * cpu disabled will differ between parts. Calculate the
> + * physical cpu number using cluster information instead.
> + */
> + ret = populate_physical_mask(monitor_np, &monitor->mask,
> + info->cluster_info);
> + if (ret < 0) {
> + dev_err_probe(&sdev->dev, ret, "failed to populate cpu mask\n");
> + goto err;
> + }

err.. the same number of logical CPUs? as in, PSCI will happily report that
the inexistent cores have been booted? or some cores start doing some sort
of hyperthreading to make up for the missing ones? this sounds sketchy..

Konrad