[PATCH] MIPS: MSA: misaligned support

From: Leonid Yegoshin
Date: Tue Mar 17 2015 - 21:16:45 EST


MIPS R5, MIPS R6 and MSA HW specs allow a broad range of address exception
on unalaigned MSA load/store operations - from none unaligned up to
full support in HW. In practice, it is expected that HW can occasionally
triggers AdE for non-aligned data access (misalignment). It is usually
expected on page boundaries because HW handling of two TLBs in single
data access operation may be complicated and expensive.

So, this patch handles MSA LD.df and ST.df Address Error exceptions.

It handles separately two cases - MSA owned by thread and MSA registers
saved in current->thread.fpu. If thread still ownes MSA unit then it
loads and stores directly with MSA unit and only one MSA register. Saving
and restoring the full MSA context (512bytes) on each misalign exception
is expensive! Preemption is disabled, of course.

Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@xxxxxxxxxx>
---
arch/mips/include/asm/processor.h | 2 +
arch/mips/include/uapi/asm/inst.h | 21 +++++
arch/mips/kernel/r4k_fpu.S | 107 ++++++++++++++++++++++++++++
arch/mips/kernel/unaligned.c | 143 +++++++++++++++++++++++++++++++++++++
4 files changed, 273 insertions(+)

diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index f1df4cb4a286..af2675060244 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -104,6 +104,8 @@ extern unsigned int vced_count, vcei_count;
#endif

union fpureg {
+ __u8 val8[FPU_REG_WIDTH / 8];
+ __u16 val16[FPU_REG_WIDTH / 16];
__u32 val32[FPU_REG_WIDTH / 32];
__u64 val64[FPU_REG_WIDTH / 64];
};
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 89c22433b1c6..7ab6987cb7d5 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -58,6 +58,7 @@ enum spec_op {
dsll_op, spec7_unused_op, dsrl_op, dsra_op,
dsll32_op, spec8_unused_op, dsrl32_op, dsra32_op
};
+#define msa_op mdmx_op

/*
* func field of spec2 opcode.
@@ -217,6 +218,14 @@ enum bshfl_func {
};

/*
+ * func field for MSA MI10 format
+ */
+enum msa_mi10_func {
+ msa_ld_op = 8,
+ msa_st_op = 9,
+};
+
+/*
* (microMIPS) Major opcodes.
*/
enum mm_major_op {
@@ -616,6 +625,17 @@ struct spec3_format { /* SPEC3 */
;)))))
};

+struct msa_mi10_format { /* MSA */
+ __BITFIELD_FIELD(unsigned int opcode : 6,
+ __BITFIELD_FIELD(signed int s10 : 10,
+ __BITFIELD_FIELD(unsigned int rs : 5,
+ __BITFIELD_FIELD(unsigned int wd : 5,
+ __BITFIELD_FIELD(unsigned int func : 4,
+ __BITFIELD_FIELD(unsigned int df : 2,
+ ;))))))
+};
+
+
/*
* microMIPS instruction formats (32-bit length)
*
@@ -884,6 +904,7 @@ union mips_instruction {
struct p_format p_format;
struct f_format f_format;
struct ma_format ma_format;
+ struct msa_mi10_format msa_mi10_format;
struct b_format b_format;
struct ps_format ps_format;
struct v_format v_format;
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 6c160c67984c..5f48f45f81e7 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -13,6 +13,7 @@
* Copyright (C) 1999, 2001 Silicon Graphics, Inc.
*/
#include <asm/asm.h>
+#include <asm/asmmacro.h>
#include <asm/errno.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
@@ -268,6 +269,112 @@ LEAF(_restore_fp_context32)
END(_restore_fp_context32)
#endif

+#ifdef CONFIG_CPU_HAS_MSA
+
+ .macro msa_ld_d wd, base
+ ld_d \wd, 0, \base
+ jalr $0, $31
+ nop
+ .align 4
+ .endm
+
+ .macro msa_st_d wd, base
+ st_d \wd, 0, \base
+ jalr $0, $31
+ nop
+ .align 4
+ .endm
+
+LEAF(msa_to_wd)
+ .set push
+ .set noreorder
+ sll t0, a0, 4
+ PTR_LA t1, Lmsa_to
+ PTR_ADDU t0, t0, t1
+ jalr $0, t0
+ nop
+Lmsa_to:
+ msa_ld_d 0, a1
+ msa_ld_d 1, a1
+ msa_ld_d 2, a1
+ msa_ld_d 3, a1
+ msa_ld_d 4, a1
+ msa_ld_d 5, a1
+ msa_ld_d 6, a1
+ msa_ld_d 7, a1
+ msa_ld_d 8, a1
+ msa_ld_d 9, a1
+ msa_ld_d 10, a1
+ msa_ld_d 11, a1
+ msa_ld_d 12, a1
+ msa_ld_d 13, a1
+ msa_ld_d 14, a1
+ msa_ld_d 15, a1
+ msa_ld_d 16, a1
+ msa_ld_d 17, a1
+ msa_ld_d 18, a1
+ msa_ld_d 19, a1
+ msa_ld_d 20, a1
+ msa_ld_d 21, a1
+ msa_ld_d 22, a1
+ msa_ld_d 23, a1
+ msa_ld_d 24, a1
+ msa_ld_d 25, a1
+ msa_ld_d 26, a1
+ msa_ld_d 27, a1
+ msa_ld_d 28, a1
+ msa_ld_d 29, a1
+ msa_ld_d 30, a1
+ msa_ld_d 31, a1
+ .set pop
+ END(msa_to_wd)
+
+LEAF(msa_from_wd)
+ .set push
+ .set noreorder
+ sll t0, a0, 4
+ PTR_LA t1, Lmsa_from
+ PTR_ADDU t0, t0, t1
+ jalr $0, t0
+ nop
+Lmsa_from:
+ msa_st_d 0, a1
+ msa_st_d 1, a1
+ msa_st_d 2, a1
+ msa_st_d 3, a1
+ msa_st_d 4, a1
+ msa_st_d 5, a1
+ msa_st_d 6, a1
+ msa_st_d 7, a1
+ msa_st_d 8, a1
+ msa_st_d 9, a1
+ msa_st_d 10, a1
+ msa_st_d 11, a1
+ msa_st_d 12, a1
+ msa_st_d 13, a1
+ msa_st_d 14, a1
+ msa_st_d 15, a1
+ msa_st_d 16, a1
+ msa_st_d 17, a1
+ msa_st_d 18, a1
+ msa_st_d 19, a1
+ msa_st_d 20, a1
+ msa_st_d 21, a1
+ msa_st_d 22, a1
+ msa_st_d 23, a1
+ msa_st_d 24, a1
+ msa_st_d 25, a1
+ msa_st_d 26, a1
+ msa_st_d 27, a1
+ msa_st_d 28, a1
+ msa_st_d 29, a1
+ msa_st_d 30, a1
+ msa_st_d 31, a1
+ .set pop
+ END(msa_from_wd)
+
+#endif /* CONFIG_CPU_HAS_MSA */
+
.set reorder

.type fault@function
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index e11906dff885..558f41fa93c5 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -108,6 +108,11 @@ static u32 unaligned_action;
#endif
extern void show_registers(struct pt_regs *regs);

+#ifdef CONFIG_CPU_HAS_MSA
+void msa_to_wd(unsigned int wd, union fpureg *from);
+void msa_from_wd(unsigned int wd, union fpureg *to);
+#endif
+
#ifdef __BIG_ENDIAN
#define LoadHW(addr, value, res) \
__asm__ __volatile__ (".set\tnoat\n" \
@@ -422,6 +427,64 @@ extern void show_registers(struct pt_regs *regs);
: "r" (value), "r" (addr), "i" (-EFAULT));
#endif

+#ifdef CONFIG_CPU_HAS_MSA
+#ifdef __BIG_ENDIAN
+/*
+ * MSA data format conversion.
+ * Only for BIG ENDIAN - LITTLE ENDIAN has register format which matches memory
+ * layout contiguosly.
+ *
+ * Conversion is done between two Double words and other formats (W/H/B)
+ * because kernel uses LD.D and ST.D to load/store MSA registers and keeps
+ * MSA registers in this format in current->thread.fpu.fpr
+ */
+static void msa_convert(union fpureg *to, union fpureg *from, int fmt)
+{
+ switch (fmt) {
+ case 0: /* byte */
+ to->val8[0] = from->val8[7];
+ to->val8[1] = from->val8[6];
+ to->val8[2] = from->val8[5];
+ to->val8[3] = from->val8[4];
+ to->val8[4] = from->val8[3];
+ to->val8[5] = from->val8[2];
+ to->val8[6] = from->val8[1];
+ to->val8[7] = from->val8[0];
+ to->val8[8] = from->val8[15];
+ to->val8[9] = from->val8[14];
+ to->val8[10] = from->val8[13];
+ to->val8[11] = from->val8[12];
+ to->val8[12] = from->val8[11];
+ to->val8[13] = from->val8[10];
+ to->val8[14] = from->val8[9];
+ to->val8[15] = from->val8[8];
+ break;
+
+ case 1: /* halfword */
+ to->val16[0] = from->val16[3];
+ to->val16[1] = from->val16[2];
+ to->val16[2] = from->val16[1];
+ to->val16[3] = from->val16[0];
+ to->val16[4] = from->val16[7];
+ to->val16[5] = from->val16[6];
+ to->val16[6] = from->val16[5];
+ to->val16[7] = from->val16[4];
+ break;
+
+ case 2: /* word */
+ to->val32[0] = from->val32[1];
+ to->val32[1] = from->val32[0];
+ to->val32[2] = from->val32[3];
+ to->val32[3] = from->val32[2];
+ break;
+
+ case 3: /* doubleword, no conversion */
+ break;
+ }
+}
+#endif
+#endif
+
static void emulate_load_store_insn(struct pt_regs *regs,
void __user *addr, unsigned int __user *pc)
{
@@ -434,6 +497,10 @@ static void emulate_load_store_insn(struct pt_regs *regs,
#ifdef CONFIG_EVA
mm_segment_t seg;
#endif
+#ifdef CONFIG_CPU_HAS_MSA
+ union fpureg msadatabase[2], *msadata;
+ unsigned int func, df, rs, wd;
+#endif
origpc = (unsigned long)pc;
orig31 = regs->regs[31];

@@ -703,6 +770,82 @@ static void emulate_load_store_insn(struct pt_regs *regs,
break;
return;

+#ifdef CONFIG_CPU_HAS_MSA
+ case msa_op:
+ if (cpu_has_mdmx)
+ goto sigill;
+
+ func = insn.msa_mi10_format.func;
+ switch (func) {
+ default:
+ goto sigbus;
+
+ case msa_ld_op:
+ case msa_st_op:
+ ;
+ }
+
+ if (!thread_msa_context_live())
+ goto sigbus;
+
+ df = insn.msa_mi10_format.df;
+ rs = insn.msa_mi10_format.rs;
+ wd = insn.msa_mi10_format.wd;
+ addr = (unsigned long *)(regs->regs[rs] + (insn.msa_mi10_format.s10 * (1 << df)));
+ /* align a working space in stack... */
+ msadata = (union fpureg *)(((unsigned long)msadatabase + 15) & ~(unsigned long)0xf);
+ if (func == msa_ld_op) {
+ if (!access_ok(VERIFY_READ, addr, 16))
+ goto sigbus;
+ compute_return_epc(regs);
+ res = __copy_from_user_inatomic(msadata, addr, 16);
+ if (res)
+ goto fault;
+ preempt_disable();
+ if (test_thread_flag(TIF_USEDMSA)) {
+#ifdef __BIG_ENDIAN
+ msa_convert(&current->thread.fpu.fpr[wd], msadata, df);
+ msa_to_wd(wd, &current->thread.fpu.fpr[wd]);
+#else
+ msa_to_wd(wd, msadata);
+#endif
+ preempt_enable();
+ } else {
+ preempt_enable();
+#ifdef __BIG_ENDIAN
+ msa_convert(&current->thread.fpu.fpr[wd], msadata, df);
+#else
+ current->thread.fpu.fpr[wd] = *msadata;
+#endif
+ }
+ } else {
+ if (!access_ok(VERIFY_WRITE, addr, 16))
+ goto sigbus;
+ compute_return_epc(regs);
+ if (test_thread_flag(TIF_USEDMSA)) {
+#ifdef __BIG_ENDIAN
+ msa_from_wd(wd, &current->thread.fpu.fpr[wd]);
+ msa_convert(msadata, &current->thread.fpu.fpr[wd], df);
+#else
+ msa_from_wd(wd, msadata);
+#endif
+ preempt_enable();
+ } else {
+ preempt_enable();
+#ifdef __BIG_ENDIAN
+ msa_convert(msadata, &current->thread.fpu.fpr[wd], df);
+#else
+ *msadata = current->thread.fpu.fpr[wd];
+#endif
+ }
+ res = __copy_to_user_inatomic(addr, msadata, 16);
+ if (res)
+ goto fault;
+ }
+
+ break;
+#endif /* CONFIG_CPU_HAS_MSA */
+
/*
* COP2 is available to implementor for application specific use.
* It's up to applications to register a notifier chain and do

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/