External email: Use caution opening links or attachments
On Wed, Sep 13, 2023 at 6:47 PM Sumit Gupta <sumitg@xxxxxxxxxx> wrote:
From: Srikar Srimath Tirumala <srikars@xxxxxxxxxx>
Current implementation of processor_thermal performs software throttling
in fixed steps of "20%" which can be too coarse for some platforms.
We observed some performance gain after reducing the throttle percentage.
Change the CPUFREQ thermal reduction percentage and maximum thermal steps
to be configurable. Also, update the default values of both for Nvidia
Tegra241 (Grace) SoC. The thermal reduction percentage is reduced to "5%"
and accordingly the maximum number of thermal steps are increased as they
are derived from the reduction percentage.
Signed-off-by: Srikar Srimath Tirumala <srikars@xxxxxxxxxx>
Signed-off-by: Sumit Gupta <sumitg@xxxxxxxxxx>
---
drivers/acpi/processor_thermal.c | 41 +++++++++++++++++++++++++++++---
1 file changed, 38 insertions(+), 3 deletions(-)
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index b7c6287eccca..30f2801abce6 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -26,7 +26,16 @@
*/
#define CPUFREQ_THERMAL_MIN_STEP 0
-#define CPUFREQ_THERMAL_MAX_STEP 3
+
+static int cpufreq_thermal_max_step = 3;
__read_mostly I suppose?
+
+/*
+ * Minimum throttle percentage for processor_thermal cooling device.
+ *
+ * The processor_thermal driver uses it to calculate the percentage amount by
+ * which cpu frequency must be reduced for each cooling state. This is also used
+ * to calculate the maximum number of throttling steps or cooling states.
+ */
+static int cpufreq_thermal_pctg = 20;
__read_mostly here too?
static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg);
@@ -71,7 +80,7 @@ static int cpufreq_get_max_state(unsigned int cpu)
if (!cpu_has_cpufreq(cpu))
return 0;
- return CPUFREQ_THERMAL_MAX_STEP;
+ return cpufreq_thermal_max_step;
}
static int cpufreq_get_cur_state(unsigned int cpu)
@@ -113,7 +122,8 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
if (!policy)
return -EINVAL;
- max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100;
+ max_freq = (policy->cpuinfo.max_freq *
+ (100 - reduction_pctg(i) * cpufreq_thermal_pctg)) / 100;
cpufreq_cpu_put(policy);
@@ -126,10 +136,35 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
return 0;
}
#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY
+#define SMCCC_SOC_ID_T241 0x036b0241
+
+void acpi_thermal_cpufreq_config_nvidia(void)
static void ?
+{
+#ifdef CONFIG_HAVE_ARM_SMCCC_DISCOVERY
+ s32 soc_id = arm_smccc_get_soc_id_version();
+
+ /* Check JEP106 code for NVIDIA Tegra241 chip (036b:0241) */
+ if ((soc_id < 0) || (soc_id != SMCCC_SOC_ID_T241))
Inner parens are redundant.
+ return;
+
+ /* Reduce the CPUFREQ Thermal reduction percentage to 5% */
+ cpufreq_thermal_pctg = 5;
+
+ /*
+ * Derive the MAX_STEP from minimum throttle percentage so that the reduction
+ * percentage doesn't end up becoming negative. Also, cap the MAX_STEP so that
+ * the CPU performance doesn't become 0.
+ */
+ cpufreq_thermal_max_step = ((100 / cpufreq_thermal_pctg) - 1);
Outer parens are redundant.
+#endif
+}
#else
static inline void void acpi_thermal_cpufreq_config_nvidia(void) {}
#endif
+
void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
{
unsigned int cpu;
+ acpi_thermal_cpufreq_config_nvidia();
+
for_each_cpu(cpu, policy->related_cpus) {
struct acpi_processor *pr = per_cpu(processors, cpu);
int ret;
--
And patch [1/2] needs to be rebased on the current ACPI thermal
material in linux-next.
Thanks!