Re: [PATCH v2 2/2] riscv: dts: spacemit: Add cpu scaling for K1 SoC
From: Shuwei Wu
Date: Thu Apr 16 2026 - 02:04:04 EST
On Tue Apr 14, 2026 at 9:25 PM CST, Anand Moon wrote:
> Hi Shuwei,
>
> On Fri, 10 Apr 2026 at 13:30, Shuwei Wu <shuwei.wu@xxxxxxxxxxx> wrote:
>>
>> Add Operating Performance Points (OPP) tables and CPU clock properties
>> for the two clusters in the SpacemiT K1 SoC.
>>
>> Also assign the CPU power supply (cpu-supply) for the Banana Pi BPI-F3
>> board to fully enable CPU DVFS.
>>
>> Signed-off-by: Shuwei Wu <shuwei.wu@xxxxxxxxxxx>
>>
>> ---
>> Changes in v2:
>> - Add k1-opp.dtsi with OPP tables for both CPU clusters
>> - Assign CPU supplies and include OPP table for Banana Pi BPI-F3
>> ---
>> arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts | 35 +++++++-
>> arch/riscv/boot/dts/spacemit/k1-opp.dtsi | 105 ++++++++++++++++++++++++
>> arch/riscv/boot/dts/spacemit/k1.dtsi | 8 ++
>> 3 files changed, 147 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts b/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts
>> index 444c3b1e6f44..3780593f610d 100644
>> --- a/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts
>> +++ b/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts
>> @@ -5,6 +5,7 @@
>>
>> #include "k1.dtsi"
>> #include "k1-pinctrl.dtsi"
>> +#include "k1-opp.dtsi"
>>
>> / {
>> model = "Banana Pi BPI-F3";
>> @@ -86,6 +87,38 @@ &combo_phy {
>> status = "okay";
>> };
>>
>> +&cpu_0 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_1 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_2 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_3 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_4 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_5 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_6 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_7 {
>> + cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> &emmc {
>> bus-width = <8>;
>> mmc-hs400-1_8v;
>> @@ -201,7 +234,7 @@ pmic@41 {
>> dldoin2-supply = <&buck5>;
>>
>> regulators {
>> - buck1 {
>> + buck1_3v45: buck1 {
>> regulator-min-microvolt = <500000>;
>> regulator-max-microvolt = <3450000>;
>> regulator-ramp-delay = <5000>;
>> diff --git a/arch/riscv/boot/dts/spacemit/k1-opp.dtsi b/arch/riscv/boot/dts/spacemit/k1-opp.dtsi
>> new file mode 100644
>> index 000000000000..768ae390686d
>> --- /dev/null
>> +++ b/arch/riscv/boot/dts/spacemit/k1-opp.dtsi
>> @@ -0,0 +1,105 @@
>> +// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
>> +
>> +/ {
>> + cluster0_opp_table: opp-table-cluster0 {
>> + compatible = "operating-points-v2";
>> + opp-shared;
>> +
>> + opp-614400000 {
>> + opp-hz = /bits/ 64 <614400000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-819000000 {
>> + opp-hz = /bits/ 64 <819000000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-1000000000 {
>> + opp-hz = /bits/ 64 <1000000000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-1228800000 {
>> + opp-hz = /bits/ 64 <1228800000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-1600000000 {
>> + opp-hz = /bits/ 64 <1600000000>;
>> + opp-microvolt = <1050000>;
>> + clock-latency-ns = <200000>;
>> + };
>> + };
>> +
>> + cluster1_opp_table: opp-table-cluster1 {
>> + compatible = "operating-points-v2";
>> + opp-shared;
>> +
>> + opp-614400000 {
>> + opp-hz = /bits/ 64 <614400000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-819000000 {
>> + opp-hz = /bits/ 64 <819000000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-1000000000 {
>> + opp-hz = /bits/ 64 <1000000000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-1228800000 {
>> + opp-hz = /bits/ 64 <1228800000>;
>> + opp-microvolt = <950000>;
>> + clock-latency-ns = <200000>;
>> + };
>> +
>> + opp-1600000000 {
>> + opp-hz = /bits/ 64 <1600000000>;
>> + opp-microvolt = <1050000>;
>> + clock-latency-ns = <200000>;
>> + };
>> + };
>> +};
>> +
>> +&cpu_0 {
>> + operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_1 {
>> + operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_2 {
>> + operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_3 {
>> + operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_4 {
>> + operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> +
>> +&cpu_5 {
>> + operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> +
>> +&cpu_6 {
>> + operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> +
>> +&cpu_7 {
>> + operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> diff --git a/arch/riscv/boot/dts/spacemit/k1.dtsi b/arch/riscv/boot/dts/spacemit/k1.dtsi
>> index 529ec68e9c23..bdd109b81730 100644
>> --- a/arch/riscv/boot/dts/spacemit/k1.dtsi
>> +++ b/arch/riscv/boot/dts/spacemit/k1.dtsi
>> @@ -54,6 +54,7 @@ cpu_0: cpu@0 {
>> compatible = "spacemit,x60", "riscv";
>> device_type = "cpu";
>> reg = <0>;
>> + clocks = <&syscon_apmu CLK_CPU_C0_CORE>;
>> riscv,isa = "rv64imafdcbv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt";
>> riscv,isa-base = "rv64i";
>> riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "b", "v", "zicbom",
>> @@ -84,6 +85,7 @@ cpu_1: cpu@1 {
>> compatible = "spacemit,x60", "riscv";
>> device_type = "cpu";
>> reg = <1>;
>> + clocks = <&syscon_apmu CLK_CPU_C0_CORE>;
>
> Based on the Spacemit kernel source, the k1-x_opp_table.dtsi file
> defines several additional clocks for the Operating Performance Points
> (OPP) table:
>
> clocks = <&ccu CLK_CPU_C0_ACE>, <&ccu CLK_CPU_C1_ACE>, <&ccu CLK_CPU_C0_TCM>,
> <&ccu CLK_CCI550>, <&ccu CLK_PLL3>, <&ccu
> CLK_CPU_C0_HI>, <&ccu CLK_CPU_C1_HI>;
> clock-names = "ace0","ace1","tcm","cci","pll3", "c0hi", "c1hi";
>
> These hardware clocks are also explicitly registered in the APMU clock driver
> via the k1_ccu_apmu_hws array, confirming their availability for frequency
> and voltage scaling on the K1-X SoC.
>
> static struct clk_hw *k1_ccu_apmu_hws[] = {
> [CLK_CCI550] = &cci550_clk.common.hw,
> [CLK_CPU_C0_HI] = &cpu_c0_hi_clk.common.hw,
> [CLK_CPU_C0_CORE] = &cpu_c0_core_clk.common.hw,
> [CLK_CPU_C0_ACE] = &cpu_c0_ace_clk.common.hw,
> [CLK_CPU_C0_TCM] = &cpu_c0_tcm_clk.common.hw,
> [CLK_CPU_C1_HI] = &cpu_c1_hi_clk.common.hw,
> [CLK_CPU_C1_CORE] = &cpu_c1_core_clk.common.hw,
> [CLK_CPU_C1_ACE] = &cpu_c1_ace_clk.common.hw,
>
> Yes, it is possible to add these clocks for DVFS to work correctly,
> provided they are managed by the appropriate driver and declared in
> the Device Tree (DT).
>
> Thanks
> -Anand
Thanks for your review and for pointing this out.
Regarding the clocks you mentioned, I'd like to clarify their roles based on
the K1 datasheet. Taking Cluster 0 as an example, c0_core_clk is the primary
clock for the cluster. c0_ace_clk and c0_tcm_clk are children derived from it,
defaulting to half the frequency of their parent core clock, while c0_hi_clk
represents the high-speed path selection.
Cluster 1 follows the same structure.
Based on the official SpacemiT Bianbu OS source, the spacemit-cpufreq.c driver
mainly performs the following tasks:
1. Sets the CCI550 clock frequency to 614MHz.
2. Sets the clock frequencies of c0_ace_clk, c1_ace1_clk, and c0_tcm_clk to half
the frequency of their parent clock.
3. For the 1.6GHz OPP, it sets the PLL3 frequency to 3.2GHz and the
c0_hi_clk/c1_hi_clk frequencies to 1.6GHz.
I booted with the manufacturer's OpenWRT image and used debugfs to confirm that
the clock states are exactly as described above.
At 1.6GHz:
Clock Source & Tree Rate (Hz) HW Enable Consumer
---------------------------------------------------------------------------
pll3 3,200,000,000 Y deviceless
└─ pll3_d2 1,600,000,000 Y deviceless
├─ cpu_c1_hi_clk 1,600,000,000 Y deviceless
│ └─ cpu_c1_pclk 1,600,000,000 Y cpu0
│ └─ cpu_c1_ace_clk 800,000,000 Y deviceless
└─ cpu_c0_hi_clk 1,600,000,000 Y deviceless
└─ cpu_c0_core_clk 1,600,000,000 Y cpu0
├─ cpu_c0_tcm_clk 800,000,000 Y deviceless
└─ cpu_c0_ace_clk 800,000,000 Y deviceless
pll1_2457p6_vco 2,457,600,000 Y deviceless
└─ pll1_d4 614,400,000 Y deviceless
└─ pll1_d4_614p4 614,400,000 Y deviceless
└─ cci550_clk 614,400,000 Y deviceless
At 1.228GHz:
Clock Source & Tree Rate (Hz) HW Enable Consumer
---------------------------------------------------------------------------
pll1_2457p6_vco 2,457,600,000 Y deviceless
└─ pll1_d2 1,228,800,000 Y deviceless
└─ pll1_d2_1228p8 1,228,800,000 Y deviceless
├─ cpu_c0_core_clk 1,228,800,000 Y cpu0
│ ├─ cpu_c0_tcm_clk 614,400,000 Y deviceless
│ └─ cpu_c0_ace_clk 614,400,000 Y deviceless
└─ cpu_c1_pclk 1,228,800,000 Y cpu0
└─ cpu_c1_ace_clk 614,400,000 Y deviceless
└─ pll1_d4 614,400,000 Y deviceless
└─ pll1_d4_614p4 614,400,000 Y deviceless
└─ cci550_clk 614,400,000 Y deviceless
pll3 3,200,000,000 Y deviceless
└─ pll3_d2 1,600,000,000 Y deviceless
├─ cpu_c1_hi_clk 1,600,000,000 Y deviceless
└─ cpu_c0_hi_clk 1,600,000,000 Y deviceless
└─ pll3_d3 1,066,666,666 Y deviceless
Regarding the necessity of listing these clocks in the DT, my analysis is as follows:
1. For CCI550, I did not find a clear definition of this clock's specific role
in the SoC datasheet. Although the vendor kernel increases its frequency,
my benchmarks show that maintaining the mainline default (245.76MHz) has a
negligible impact on CPU performance.
2. For ACE and TCM clocks, they function as synchronous children of the core
clock with a default divide-by-2 ratio. Since they scale automatically relative
to c0_core_clk/c1_core_clk and no other peripherals depend on them, they do not
require manual management in the OPP table.
3. For the high-speed path, the underlying clock controller logic already handles
the parent MUX switching and PLL3 scaling automatically when clk_set_rate()
is called on the core clock.
I have verified this by checking the hardware state in the mainline kernel.
The clock tree matches the vendor kernel's configuration:
At 1.6GHz:
Clock Source & Tree Rate (Hz) HW Enable Consumer
---------------------------------------------------------------------------
pll3 3,200,000,000 Y deviceless
└─ pll3_d2 1,600,000,000 Y deviceless
├─ cpu_c1_hi_clk 1,600,000,000 Y deviceless
│ └─ cpu_c1_core_clk 1,600,000,000 Y cpu4
│ └─ cpu_c1_ace_clk 800,000,000 Y deviceless
└─ cpu_c0_hi_clk 1,600,000,000 Y deviceless
└─ cpu_c0_core_clk 1,600,000,000 Y cpu0
├─ cpu_c0_tcm_clk 800,000,000 Y deviceless
└─ cpu_c0_ace_clk 800,000,000 Y deviceless
pll1 2,457,600,000 Y deviceless
└─ pll1_d5 491,520,000 Y deviceless
└─ pll1_d5_491p52 491,520,000 Y deviceless
└─ cci550_clk 245,760,000 Y deviceless
At 1.228GHz:
Clock Source & Tree Rate (Hz) HW Enable Consumer
---------------------------------------------------------------------------
pll1 2,457,600,000 Y deviceless
├─ pll1_d5 491,520,000 Y deviceless
│ └─ pll1_d5_491p52 491,520,000 Y deviceless
│ └─ cci550_clk 245,760,000 Y deviceless
└─ pll1_d2 1,228,800,000 Y deviceless
└─ pll1_d2_1228p8 1,228,800,000 Y deviceless
└─ cpu_c0_core_clk 1,228,800,000 Y cpu0
├─ cpu_c0_tcm_clk 614,400,000 Y deviceless
└─ cpu_c0_ace_clk 614,400,000 Y deviceless
pll3 3,200,000,000 Y deviceless
└─ pll3_d2 1,600,000,000 Y deviceless
└─ cpu_c1_hi_clk 1,600,000,000 Y deviceless
└─ cpu_c1_core_clk 1,600,000,000 Y cpu4
└─ cpu_c1_ace_clk 800,000,000 Y deviceless
Performance benchmarks also confirm that the current configuration is sufficient:
Benchmark (AWK computation): time awk 'BEGIN{for(i=0;i<10000000;i++) sum+=i}'
----------------------------------------------------------------------------
Frequency | Mainline Linux (s) | OpenWrt (s)
(kHz) | Real (Total) | User (CPU) | Real (Total) | User (CPU) )
-------------+---------------+---------------+---------------+--------------
1,600,000 | 1.82s | 1.81s | 1.73s | 1.73s
1,228,800 | 2.34s | 2.33s | 2.26s | 2.26s
1,000,000 | 2.94s | 2.86s | 2.78s | 2.78s
819,000 | 3.54s | 3.53s | 3.39s | 3.39s
614,400 | 4.73s | 4.71s | 4.51s | 4.51s
----------------------------------------------------------------------------
In summary, because the clock controller correctly handles the internal dividers
and parent switching, declaring only the primary core clock for each CPU node is
sufficient for functional DVFS.
--
Best regards,
Shuwei Wu