+ .align 4
doesn't this mean the first one & label might not be suitably aligned.
Would it be better to put this before the ld_d (no need for it after
$w31 case) and putting another .align 4 before the Lmsa_to and Lmsa_from
labels (so the label itself is aligned)?
+
+ case 2: /* word */
+ to->val32[0] = from->val32[1];
+ to->val32[1] = from->val32[0];
+ to->val32[2] = from->val32[3];
+ to->val32[3] = from->val32[2];
FWIW since the FP/MSA patches that Paul submitted, there are also
working endian agnostic accessors created with BUILD_FPR_ACCESS, which
use the FPR_IDX macro (see http://patchwork.linux-mips.org/patch/9169/),
which should work for 8bit and 16bit sizes too.
I wonder if the compiler would unroll/optimise this sort of thing:
for (i = 0; i < (FPU_REG_WIDTH / 8); ++i)
to_val8[i] = from->val[FPR_IDX(8, i)];
No worries if not.
+ break;don't you still need to copy the value though?
+
+ case 3: /* doubleword, no conversion */
+ break;
+ }Will this ever happen? (I can't see AdE handler enabling interrupts).
+}
+#endif
+#endif
+
static void emulate_load_store_insn(struct pt_regs *regs,
void __user *addr, unsigned int __user *pc)
{
@@ -434,6 +497,10 @@ static void emulate_load_store_insn(struct pt_regs *regs,
#ifdef CONFIG_EVA
mm_segment_t seg;
#endif
+#ifdef CONFIG_CPU_HAS_MSA
+ union fpureg msadatabase[2], *msadata;
+ unsigned int func, df, rs, wd;
+#endif
origpc = (unsigned long)pc;
orig31 = regs->regs[31];
@@ -703,6 +770,82 @@ static void emulate_load_store_insn(struct pt_regs *regs,
break;
return;
+#ifdef CONFIG_CPU_HAS_MSA
+ case msa_op:
+ if (cpu_has_mdmx)
+ goto sigill;
+
+ func = insn.msa_mi10_format.func;
+ switch (func) {
+ default:
+ goto sigbus;
+
+ case msa_ld_op:
+ case msa_st_op:
+ ;
+ }
+
+ if (!thread_msa_context_live())
+ goto sigbus;
If the MSA context genuinely isn't live (i.e. it can be considered
UNPREDICTABLE), then surely a load operation should still succeed?
+"* (1 << df)"?
+ df = insn.msa_mi10_format.df;
+ rs = insn.msa_mi10_format.rs;
+ wd = insn.msa_mi10_format.wd;
+ addr = (unsigned long *)(regs->regs[rs] + (insn.msa_mi10_format.s10 * (1 << df)));
why not just "<< df"?
+ /* align a working space in stack... */Maybe you could just use __aligned(16) on a single local union fpureg.
+ msadata = (union fpureg *)(((unsigned long)msadatabase + 15) & ~(unsigned long)0xf);
+ }forgot to preempt_disable()?
+ } else {
+ if (!access_ok(VERIFY_WRITE, addr, 16))
+ goto sigbus;
+ compute_return_epc(regs);
Yes. But has it sense? It is just 2 doublewords is replaced by single PTR assignment. However, if msadata is not changed it gives a compiler some room for optimization.
+ if (test_thread_flag(TIF_USEDMSA)) {hmm, you could cheat and change this to the following?:
+#ifdef __BIG_ENDIAN
+ msa_from_wd(wd, ¤t->thread.fpu.fpr[wd]);
+ msa_convert(msadata, ¤t->thread.fpu.fpr[wd], df);
+#else
+ msa_from_wd(wd, msadata);
+#endif
+ preempt_enable();
+ } else {
+ preempt_enable();
+#ifdef __BIG_ENDIAN
+ msa_convert(msadata, ¤t->thread.fpu.fpr[wd], df);
+#else
+ *msadata = current->thread.fpu.fpr[wd];
msadata = ¤t->thread.fpu.fpr[wd];