Hi Paul,
On 2020å05æ20æ 03:41, Paul Cercueil wrote:
Hi Zhou,
Le mar. 19 mai 2020 Ã 22:35, åçæ (Zhou Yanjie) <zhouyanjie@xxxxxxxxxxxxxx> a Ãcrit :Forward port smp support from kernel 3.18.3 of CI20_linux
to upstream kernel 5.6.
Tested-by: H. Nikolaus Schaller <hns@xxxxxxxxxxxxx>
Tested-by: Paul Boddie <paul@xxxxxxxxxxxxx>
Signed-off-by: åçæ (Zhou Yanjie) <zhouyanjie@xxxxxxxxxxxxxx>
Reviewed-by: Jiaxun Yang <jiaxun.yang@xxxxxxxxxxx>
---
Notes:
v1->v2:
1.Remove unnecessary "plat_irq_dispatch(void)" in irq-ingenic.c.
2.Add a timeout check for "jz4780_boot_secondary()" to avoid a dead loop.
3.Replace hard code in smp.c with macro.
v2->v3:
1.Remove unnecessary "extern void (*r4k_blast_dcache)(void)" in smp.c.
2.Use "for_each_of_cpu_node" instead "for_each_compatible_node" in smp.c.
3.Use "of_cpu_node_to_id" instead "of_property_read_u32_index" in smp.c.
4.Move LCR related operations to jz4780-cgu.c.
v3->v4:
Rebase on top of kernel 5.6-rc1.
v4->v5:
1.Splitting changes involving "jz4780-cgu.c" into separate commit.
2.Use "request_irq()" replace "setup_irq()".
v5->v6:
In order to have a kernel that works on multiple SoCs at the same
time, use "IS_ENABLED()" replace "#ifdef".
v6->v7:
1.SMP has be decoupled from the SoC version.
2.Add mailboxes 3 and 4 for XBurst.
3.Adjust code in "jz4780_smp_prepare_cpus()".
4."jz4780_smp_init()" has be marked "__init".
v7->v8:
No change.
arch/mips/include/asm/mach-jz4740/smp.h | 87 +++++++++++
arch/mips/jz4740/Kconfig | 2 +
arch/mips/jz4740/Makefile | 5 +
arch/mips/jz4740/prom.c | 4 +
arch/mips/jz4740/smp-entry.S | 57 +++++++
arch/mips/jz4740/smp.c | 258 ++++++++++++++++++++++++++++++++
arch/mips/kernel/idle.c | 35 ++++-
7 files changed, 447 insertions(+), 1 deletion(-)
create mode 100644 arch/mips/include/asm/mach-jz4740/smp.h
create mode 100644 arch/mips/jz4740/smp-entry.S
create mode 100644 arch/mips/jz4740/smp.c
diff --git a/arch/mips/include/asm/mach-jz4740/smp.h b/arch/mips/include/asm/mach-jz4740/smp.h
new file mode 100644
index 00000000..86f660f
--- /dev/null
+++ b/arch/mips/include/asm/mach-jz4740/smp.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2013, Paul Burton <paul.burton@xxxxxxxxxx>
+ * JZ4780 SMP definitions
+ */
+
+#ifndef __MIPS_ASM_MACH_JZ4740_SMP_H__
+#define __MIPS_ASM_MACH_JZ4740_SMP_H__
+
+#define read_c0_corectrl() __read_32bit_c0_register($12, 2)
+#define write_c0_corectrl(val) __write_32bit_c0_register($12, 2, val)
+
+#define read_c0_corestatus() __read_32bit_c0_register($12, 3)
+#define write_c0_corestatus(val) __write_32bit_c0_register($12, 3, val)
+
+#define read_c0_reim() __read_32bit_c0_register($12, 4)
+#define write_c0_reim(val) __write_32bit_c0_register($12, 4, val)
+
+#define read_c0_mailbox0() __read_32bit_c0_register($20, 0)
+#define write_c0_mailbox0(val) __write_32bit_c0_register($20, 0, val)
+
+#define read_c0_mailbox1() __read_32bit_c0_register($20, 1)
+#define write_c0_mailbox1(val) __write_32bit_c0_register($20, 1, val)
+
+#define read_c0_mailbox2() __read_32bit_c0_register($20, 2)
+#define write_c0_mailbox2(val) __write_32bit_c0_register($20, 2, val)
+
+#define read_c0_mailbox3() __read_32bit_c0_register($20, 3)
+#define write_c0_mailbox3(val) __write_32bit_c0_register($20, 3, val)
+
+#define smp_clr_pending(mask) do { \
+ unsigned int stat; \
+ stat = read_c0_corestatus(); \
+ stat &= ~((mask) & 0xff); \
+ write_c0_corestatus(stat); \
+ } while (0)
+
+/*
+ * Core Control register
+ */
+#define CORECTRL_SLEEP1M_SHIFT 17
+#define CORECTRL_SLEEP1M (_ULCAST_(0x1) << CORECTRL_SLEEP1M_SHIFT)
+#define CORECTRL_SLEEP0M_SHIFT 16
+#define CORECTRL_SLEEP0M (_ULCAST_(0x1) << CORECTRL_SLEEP0M_SHIFT)
+#define CORECTRL_RPC1_SHIFT 9
+#define CORECTRL_RPC1 (_ULCAST_(0x1) << CORECTRL_RPC1_SHIFT)
+#define CORECTRL_RPC0_SHIFT 8
+#define CORECTRL_RPC0 (_ULCAST_(0x1) << CORECTRL_RPC0_SHIFT)
+#define CORECTRL_SWRST1_SHIFT 1
+#define CORECTRL_SWRST1 (_ULCAST_(0x1) << CORECTRL_SWRST1_SHIFT)
+#define CORECTRL_SWRST0_SHIFT 0
+#define CORECTRL_SWRST0 (_ULCAST_(0x1) << CORECTRL_SWRST0_SHIFT)
+
+/*
+ * Core Status register
+ */
+#define CORESTATUS_SLEEP1_SHIFT 17
+#define CORESTATUS_SLEEP1 (_ULCAST_(0x1) << CORESTATUS_SLEEP1_SHIFT)
+#define CORESTATUS_SLEEP0_SHIFT 16
+#define CORESTATUS_SLEEP0 (_ULCAST_(0x1) << CORESTATUS_SLEEP0_SHIFT)
+#define CORESTATUS_IRQ1P_SHIFT 9
+#define CORESTATUS_IRQ1P (_ULCAST_(0x1) << CORESTATUS_IRQ1P_SHIFT)
+#define CORESTATUS_IRQ0P_SHIFT 8
+#define CORESTATUS_IRQ0P (_ULCAST_(0x1) << CORESTATUS_IRQ8P_SHIFT)
+#define CORESTATUS_MIRQ1P_SHIFT 1
+#define CORESTATUS_MIRQ1P (_ULCAST_(0x1) << CORESTATUS_MIRQ1P_SHIFT)
+#define CORESTATUS_MIRQ0P_SHIFT 0
+#define CORESTATUS_MIRQ0P (_ULCAST_(0x1) << CORESTATUS_MIRQ0P_SHIFT)
+
+/*
+ * Reset Entry & IRQ Mask register
+ */
+#define REIM_ENTRY_SHIFT 16
+#define REIM_ENTRY (_ULCAST_(0xffff) << REIM_ENTRY_SHIFT)
+#define REIM_IRQ1M_SHIFT 9
+#define REIM_IRQ1M (_ULCAST_(0x1) << REIM_IRQ1M_SHIFT)
+#define REIM_IRQ0M_SHIFT 8
+#define REIM_IRQ0M (_ULCAST_(0x1) << REIM_IRQ0M_SHIFT)
+#define REIM_MBOXIRQ1M_SHIFT 1
+#define REIM_MBOXIRQ1M (_ULCAST_(0x1) << REIM_MBOXIRQ1M_SHIFT)
+#define REIM_MBOXIRQ0M_SHIFT 0
+#define REIM_MBOXIRQ0M (_ULCAST_(0x1) << REIM_MBOXIRQ0M_SHIFT)
+
+extern void jz4780_smp_init(void);
+extern void jz4780_secondary_cpu_entry(void);
+
+#endif /* __MIPS_ASM_MACH_JZ4740_SMP_H__ */
diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig
index 412d2fa..2b88557 100644
--- a/arch/mips/jz4740/Kconfig
+++ b/arch/mips/jz4740/Kconfig
@@ -34,9 +34,11 @@ config MACH_JZ4770
config MACH_JZ4780
bool
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select MIPS_CPU_SCACHE
select SYS_HAS_CPU_MIPS32_R2
select SYS_SUPPORTS_HIGHMEM
+ select SYS_SUPPORTS_SMP
config MACH_X1000
bool
diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile
index 6de14c0..0a0f024 100644
--- a/arch/mips/jz4740/Makefile
+++ b/arch/mips/jz4740/Makefile
@@ -12,3 +12,8 @@ CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt
# PM support
obj-$(CONFIG_PM) += pm.o
+
+# SMP support
+
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SMP) += smp-entry.o
diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c
index ff4555c..4acf5c2c 100644
--- a/arch/mips/jz4740/prom.c
+++ b/arch/mips/jz4740/prom.c
@@ -8,10 +8,14 @@
#include <asm/bootinfo.h>
#include <asm/fw/fw.h>
+#include <asm/mach-jz4740/smp.h>
void __init prom_init(void)
{
fw_init_cmdline();
+
+ if (IS_ENABLED(CONFIG_SMP))
+ jz4780_smp_init();
}
void __init prom_free_prom_memory(void)
diff --git a/arch/mips/jz4740/smp-entry.S b/arch/mips/jz4740/smp-entry.S
new file mode 100644
index 00000000..20049a3
--- /dev/null
+++ b/arch/mips/jz4740/smp-entry.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2013, Paul Burton <paul.burton@xxxxxxxxxx>
+ * JZ4780 SMP entry point
+ */
+
+#include <asm/addrspace.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cacheops.h>
+#include <asm/mipsregs.h>
+
+#define CACHE_SIZE (32 * 1024)
+#define CACHE_LINESIZE 32
+
+.extern jz4780_cpu_entry_sp
+.extern jz4780_cpu_entry_gp
+
+.section .text.smp-entry
+.balign 0x10000
+.set noreorder
+LEAF(jz4780_secondary_cpu_entry)
+ mtc0 zero, CP0_CAUSE
+
+ li t0, ST0_CU0
+ mtc0 t0, CP0_STATUS
+
+ /* cache setup */
+ li t0, KSEG0
+ ori t1, t0, CACHE_SIZE
+ mtc0 zero, CP0_TAGLO, 0
+1: cache Index_Store_Tag_I, 0(t0)
+ cache Index_Store_Tag_D, 0(t0)
+ bne t0, t1, 1b
+ addiu t0, t0, CACHE_LINESIZE
+
+ /* kseg0 cache attribute */
+ mfc0 t0, CP0_CONFIG, 0
+ ori t0, t0, CONF_CM_CACHABLE_NONCOHERENT
+ mtc0 t0, CP0_CONFIG, 0
+
+ /* pagemask */
+ mtc0 zero, CP0_PAGEMASK, 0
+
+ /* retrieve sp */
+ la t0, jz4780_cpu_entry_sp
+ lw sp, 0(t0)
+
+ /* retrieve gp */
+ la t0, jz4780_cpu_entry_gp
+ lw gp, 0(t0)
+
+ /* jump to the kernel in kseg0 */
+ la t0, smp_bootstrap
+ jr t0
+ nop
+ END(jz4780_secondary_cpu_entry)
diff --git a/arch/mips/jz4740/smp.c b/arch/mips/jz4740/smp.c
new file mode 100644
index 00000000..d95d22a
--- /dev/null
+++ b/arch/mips/jz4740/smp.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013, Paul Burton <paul.burton@xxxxxxxxxx>
+ * JZ4780 SMP
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/smp.h>
+#include <linux/tick.h>
+#include <asm/mach-jz4740/smp.h>
+#include <asm/smp-ops.h>
+
+static struct clk *cpu_clock_gates[CONFIG_NR_CPUS] = { NULL };
+
+u32 jz4780_cpu_entry_sp;
+u32 jz4780_cpu_entry_gp;
+
+static struct cpumask cpu_running;
This cpumask is written, but never read anywhere. Since it's static, I believe it's dead code.
Sure, I will remove it.
+
+static DEFINE_SPINLOCK(smp_lock);
+
+static irqreturn_t mbox_handler(int irq, void *dev_id)
+{
+ int cpu = smp_processor_id();
+ u32 action, status;
+
+ spin_lock(&smp_lock);
+
+ switch (cpu) {
+ case 0:
+ action = read_c0_mailbox0();
+ write_c0_mailbox0(0);
+ break;
+ case 1:
+ action = read_c0_mailbox1();
+ write_c0_mailbox1(0);
+ break;
+ case 2:
+ action = read_c0_mailbox2();
+ write_c0_mailbox2(0);
+ break;
+ case 3:
+ action = read_c0_mailbox3();
+ write_c0_mailbox3(0);
+ break;
+ default:
+ panic("unhandled cpu %d!", cpu);
+ }
+
+ /* clear pending mailbox interrupt */
+ status = read_c0_corestatus();
+ status &= ~(CORESTATUS_MIRQ0P << cpu);
+ write_c0_corestatus(status);
+
+ spin_unlock(&smp_lock);
+
+ if (action & SMP_RESCHEDULE_YOURSELF)
+ scheduler_ipi();
+ if (action & SMP_CALL_FUNCTION)
+ generic_smp_call_function_interrupt();
+
+ return IRQ_HANDLED;
+}
+
+static void jz4780_smp_setup(void)
+{
+ u32 addr, reim;
+ int cpu;
+
+ reim = read_c0_reim();
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ __cpu_number_map[cpu] = cpu;
+ __cpu_logical_map[cpu] = cpu;
+ set_cpu_possible(cpu, true);
I assume if you do that, you will have num_possible_cpus() == NR_CPUS, which is not what you want.
Correct me if I'm wrong, but I think you would need to call set_cpu_possible() for each CPU node found.
Yes, the current way is indeed a little problem, it will cause num_possible_cpus() == NR_CPUS, I will try to find a better way.
+ }
+
+ /* mask mailbox interrupts for this core */
+ reim &= ~REIM_MBOXIRQ0M;
+ write_c0_reim(reim);
+
+ /* clear mailboxes & pending mailbox IRQs */
+ write_c0_mailbox0(0);
+ write_c0_mailbox1(0);
Write mailbox2/3 too.
Although the XBurst1 architecture can have up to four cores, but JZ4780 only has two. If we need to write all four mailboxes here, do we need change the function name to "xburst1_smp_setup" or other similar names? This seems more appropriate.
+ write_c0_corestatus(0);
+
+ /* set reset entry point */
+ addr = KSEG1ADDR((u32)&jz4780_secondary_cpu_entry);
+ WARN_ON(addr & ~REIM_ENTRY);
+ reim &= ~REIM_ENTRY;
+ reim |= addr & REIM_ENTRY;
+
+ /* unmask mailbox interrupts for this core */
+ reim |= REIM_MBOXIRQ0M;
+ write_c0_reim(reim);
+ set_c0_status(STATUSF_IP3);
+ irq_enable_hazard();
+
+ cpumask_set_cpu(cpu, &cpu_running);
+}
+
+static void jz4780_smp_prepare_cpus(unsigned int max_cpus)
+{
+ struct device_node *cpu_node;
+ unsigned cpu, ctrl;
+ int err;
+
+ /* setup the mailbox IRQ */
+ err = request_irq(MIPS_CPU_IRQ_BASE + 3, mbox_handler,
+ IRQF_PERCPU | IRQF_NO_THREAD, "core mailbox", NULL);
Please don't hardcode the IRQ number. Instead, it should be read from devicetree, maybe from the 'cpus' node (not sure).
OK, I'll try to figure it out.
+ if (err)
+ pr_err("request_irq() on core mailbox failed\n");
+
+ ctrl = read_c0_corectrl();
+
+ for_each_of_cpu_node(cpu_node) {
+ cpu = of_cpu_node_to_id(cpu_node);
+ if (cpu < 0) {
+ pr_err("Failed to read index of %s\n",
+ cpu_node->full_name);
+ continue;
+ }
+
+ /* use reset entry point from REIM register */
+ ctrl |= CORECTRL_RPC0 << cpu;
+
+ cpu_clock_gates[cpu] = of_clk_get(cpu_node, 0);
+ if (IS_ERR(cpu_clock_gates[cpu])) {
+ cpu_clock_gates[cpu] = NULL;
+ continue;
+ }
+
+ err = clk_prepare(cpu_clock_gates[cpu]);
+ if (err)
+ pr_err("Failed to prepare CPU clock gate\n");
I'd suggest to call clk_prepare() in jz4780_boot_secondary(), since you can't handle errors here. That would also avoid the static cpu_clock_gates[] array which can grow quite big since its size is given by NR_CPUS.
Sure, I will move it to jz4780_boot_secondary().
+ }
+
+ write_c0_corectrl(ctrl);
+}
+
+static int jz4780_boot_secondary(int cpu, struct task_struct *idle)
+{
+ unsigned long flags;
+ u32 ctrl;
+
+ spin_lock_irqsave(&smp_lock, flags);
+
+ /* ensure the core is in reset */
+ ctrl = read_c0_corectrl();
+ ctrl |= CORECTRL_SWRST0 << cpu;
+ write_c0_corectrl(ctrl);
+
+ /* ungate core clock */
+ if (cpu_clock_gates[cpu])
+ clk_enable(cpu_clock_gates[cpu]);
You should check the return value of clk_enable().
+ break;
Sure.
+
+ /* set entry sp/gp register values */
+ jz4780_cpu_entry_sp = __KSTK_TOS(idle);
+ jz4780_cpu_entry_gp = (u32)task_thread_info(idle);
+ smp_wmb();
+
+ /* take the core out of reset */
+ ctrl &= ~(CORECTRL_SWRST0 << cpu);
+ write_c0_corectrl(ctrl);
+
+ cpumask_set_cpu(cpu, &cpu_running);
+
+ spin_unlock_irqrestore(&smp_lock, flags);
+
+ return 0;
+}
+
+static void jz4780_init_secondary(void)
+{
+}
+
+static void jz4780_smp_finish(void)
+{
+ u32 reim;
+
+ spin_lock(&smp_lock);
+
+ /* unmask mailbox interrupts for this core */
+ reim = read_c0_reim();
+ reim |= REIM_MBOXIRQ0M << smp_processor_id();
+ write_c0_reim(reim);
+
+ spin_unlock(&smp_lock);
+
+ /* unmask interrupts for this core */
+ change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP2 |
+ STATUSF_IP1 | STATUSF_IP0);
+ irq_enable_hazard();
+
+ /* force broadcast timer */
+ tick_broadcast_force();
+}
+
+static void jz4780_send_ipi_single_locked(int cpu, unsigned int action)
+{
+ u32 mbox;
+
+ switch (cpu) {
+ case 0:
+ mbox = read_c0_mailbox0();
+ write_c0_mailbox0(mbox | action);
+ break;
+ case 1:
+ mbox = read_c0_mailbox1();
+ write_c0_mailbox1(mbox | action);
Handle mailboxes 2/3 too here.
Same to the above, do we need to change the function to a more appropriate name?
+ default:
+ panic("unhandled cpu %d!", cpu);
+ }
+}
+
+static void jz4780_send_ipi_single(int cpu, unsigned int action)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&smp_lock, flags);
+ jz4780_send_ipi_single_locked(cpu, action);
+ spin_unlock_irqrestore(&smp_lock, flags);
+}
+
+static void jz4780_send_ipi_mask(const struct cpumask *mask,
+ unsigned int action)
+{
+ unsigned long flags;
+ int cpu;
+
+ spin_lock_irqsave(&smp_lock, flags);
+
+ for_each_cpu(cpu, mask)
+ jz4780_send_ipi_single_locked(cpu, action);
+
+ spin_unlock_irqrestore(&smp_lock, flags);
+}
+
+static struct plat_smp_ops jz4780_smp_ops = {
+ .send_ipi_single = jz4780_send_ipi_single,
+ .send_ipi_mask = jz4780_send_ipi_mask,
+ .init_secondary = jz4780_init_secondary,
+ .smp_finish = jz4780_smp_finish,
+ .boot_secondary = jz4780_boot_secondary,
+ .smp_setup = jz4780_smp_setup,
+ .prepare_cpus = jz4780_smp_prepare_cpus,
+};
+
+void __init jz4780_smp_init(void)
+{
+ register_smp_ops(&jz4780_smp_ops);
+}
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index 37f8e78..d33f2d4 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -18,6 +18,7 @@
#include <asm/cpu-type.h>
#include <asm/idle.h>
#include <asm/mipsregs.h>
+#include <asm/r4kcache.h>
/*
* Not all of the MIPS CPUs have the "wait" instruction available. Moreover,
@@ -88,6 +89,34 @@ static void __cpuidle rm7k_wait_irqoff(void)
}
/*
+ * The Ingenic jz4780 SMP variant has to write back dirty cache lines before
+ * executing wait. The CPU & cache clock will be gated until we return from
+ * the wait, and if another core attempts to access data from our data cache
+ * during this time then it will lock up.
+ */
+void jz4780_smp_wait_irqoff(void)
+{
+ unsigned long pending = read_c0_cause() & read_c0_status() & CAUSEF_IP;
+
+ /*
+ * Going to idle has a significant overhead due to the cache flush so
+ * try to avoid it if we'll immediately be woken again due to an IRQ.
+ */
You could add a fast path here where you just call r4k_wait() if num_online_cpus() < 2.
Please correct me if I'm wrong, if we add it here, when the number of CPU cores is greater than 1 (which should be the case on most occasions), each call to "jz4780_smp_wait_irqoff" will generate additional overhead (judging the number of CPUs), is it better to change "if (IS_ENABLED(CONFIG_SMP))" in "case CPU_XBURST" below to "if (IS_ENABLED(CONFIG_SMP) && (num_possible_cpus() > 1))"?
Thanks and best regards!
-Paul
+ if (!need_resched() && !pending) {
+ r4k_blast_dcache();
+
+ __asm__(
+ " .set push \n"
+ " .set mips3 \n"
+ " sync \n"
+ " wait \n"
+ " .set pop \n");
+ }
+
+ local_irq_enable();
+}
+
+/*
* Au1 'wait' is only useful when the 32kHz counter is used as timer,
* since coreclock (and the cp0 counter) stops upon executing it. Only an
* interrupt can wake it, so they must be enabled before entering idle modes.
@@ -172,7 +201,6 @@ void __init check_wait(void)
case CPU_CAVIUM_OCTEON_PLUS:
case CPU_CAVIUM_OCTEON2:
case CPU_CAVIUM_OCTEON3:
- case CPU_XBURST:
case CPU_LOONGSON32:
case CPU_XLR:
case CPU_XLP:
@@ -246,6 +274,11 @@ void __init check_wait(void)
cpu_wait = r4k_wait;
*/
break;
+ case CPU_XBURST:
+ if (IS_ENABLED(CONFIG_SMP))
+ cpu_wait = jz4780_smp_wait_irqoff;
+ else
+ cpu_wait = r4k_wait;
default:
break;
}
--
2.7.4