gcc patches for EV56 byte/word instructions

Jim Paradis (paradis@amt.tay1.dec.com)
Tue, 4 Jun 1996 14:28:48 -0500 (EDT)


Well, for those who are interested here is my first foray into mucking
about in gcc machine descriptions. If you specify the "-m21164a" flag
to gcc with these patches, it will generate the byte/word load/store
instructions.

Actually, I cleaned up the machine-specific flag specification a bit and
added flags for all current CPUs (21064, 21066, 21164, 21164a). Currently
only the "21164a" flag does anything.

Note that i have not yet done gas support for these instructions, so at
the moment it won't do you much good. But I'm putting it out here
anyway because I'm going on leave for six weeks and I figure I'll give
you all the goods now 8-)

Have fun!

diff -u --recursive gcc-2.7.1/config/alpha/alpha.c gcc-2.7.1-new/config/alpha/alpha.c
--- gcc-2.7.1/config/alpha/alpha.c Tue Jun 4 14:12:42 1996
+++ gcc-2.7.1-new/config/alpha/alpha.c Mon Jun 3 16:28:22 1996
@@ -418,7 +418,15 @@
return 1;
/* ... fall through ... */
case MEM:
- return mode != HImode && mode != QImode && general_operand (op, mode);
+ /* Machines with byte/word instructions support QImode and HImode
+ * for input operands. Others do not.
+ */
+ if(TARGET_HAS_BYTEWORD) {
+ return general_operand(op, mode);
+ }
+ else {
+ return mode != HImode && mode != QImode && general_operand (op, mode);
+ }

case CONST_DOUBLE:
return GET_MODE_CLASS (mode) == MODE_FLOAT && op == CONST0_RTX (mode);
diff -u --recursive gcc-2.7.1/config/alpha/alpha.h gcc-2.7.1-new/config/alpha/alpha.h
--- gcc-2.7.1/config/alpha/alpha.h Tue Jun 4 14:12:43 1996
+++ gcc-2.7.1-new/config/alpha/alpha.h Mon Jun 3 14:59:39 1996
@@ -105,25 +105,39 @@
extern enum alpha_fp_rounding_mode alpha_fprm;
extern enum alpha_fp_trap_mode alpha_fptm;

+/* Bit masks for target flags */
+#define MASK_FP 0x01
+#define MASK_FPREGS 0x02
+#define MASK_GAS 0x04
+#define MASK_IEEE 0x08
+#define MASK_21064 0x100 /* EV4/EV45 */
+#define MASK_21066 0x200 /* LCA4/LCA45 */
+#define MASK_21164 0x400 /* EV5 */
+#define MASK_21164A 0x800 /* EV56 */
+
/* This means that floating-point support exists in the target implementation
of the Alpha architecture. This is usually the default. */

-#define TARGET_FP (target_flags & 1)
+#define TARGET_FP (target_flags & MASK_FP)

/* This means that floating-point registers are allowed to be used. Note
that Alpha implementations without FP operations are required to
provide the FP registers. */

-#define TARGET_FPREGS (target_flags & 2)
+#define TARGET_FPREGS (target_flags & MASK_FPREGS)

/* This means that gas is used to process the assembler file. */

-#define MASK_GAS 4
#define TARGET_GAS (target_flags & MASK_GAS)

/* This means that we should mark procedures as IEEE conformant. */

-#define TARGET_IEEE_CONFORMANT (target_flags & 8)
+#define TARGET_IEEE_CONFORMANT (target_flags & MASK_IEEE)
+
+/* This means we can generate byte/word instructions (available only
+ * on 21164A and later)
+ */
+#define TARGET_HAS_BYTEWORD (target_flags & MASK_21164A)

/* Macro to define tables used to set the flags.
This is a list in braces of pairs in braces,
@@ -132,24 +146,28 @@
An empty string NAME is used to identify the default VALUE. */

#define TARGET_SWITCHES \
- { {"no-soft-float", 1}, \
- {"soft-float", -1}, \
- {"fp-regs", 2}, \
- {"no-fp-regs", -3}, \
+ { {"no-soft-float", MASK_FP}, \
+ {"soft-float", -MASK_FP}, \
+ {"fp-regs", MASK_FPREGS}, \
+ {"no-fp-regs", -(MASK_FP|MASK_FPREGS)}, \
{"alpha-as", -MASK_GAS}, \
{"gas", MASK_GAS}, \
- {"ieee-conformant", 8}, \
+ {"ieee-conformant", MASK_IEEE}, \
+ {"21064", MASK_21064 }, \
+ {"21066", MASK_21066 }, \
+ {"21164", MASK_21164 }, \
+ {"21164a", MASK_21164A }, \
/* ignore high-level options: */ \
{"ieee", 0}, \
{"ieee-with-inexact", 0}, \
{"", TARGET_DEFAULT | TARGET_CPU_DEFAULT} }

#ifndef TARGET_DEFAULT
-#define TARGET_DEFAULT 3
+#define TARGET_DEFAULT (MASK_FP|MASK_FPREGS)
#endif

#ifndef TARGET_CPU_DEFAULT
-#define TARGET_CPU_DEFAULT 0
+#define TARGET_CPU_DEFAULT MASK_21064
#endif

/* This macro is similar to `TARGET_SWITCHES' but defines names of
diff -u --recursive gcc-2.7.1/config/alpha/alpha.md gcc-2.7.1-new/config/alpha/alpha.md
--- gcc-2.7.1/config/alpha/alpha.md Tue Jun 4 14:12:43 1996
+++ gcc-2.7.1-new/config/alpha/alpha.md Mon Jun 3 16:51:11 1996
@@ -3348,8 +3348,9 @@
""
"call_pal 0x86")

-;; Finally, we have the basic data motion insns. The byte and word insns
-;; are done via define_expand. Start with the floating-point insns, since
+;; Finally, we have the basic data motion insns.
+;; The byte and word insns are done via define_expand for all CPUs
+;; prior to 21164A (EV56). Start with the floating-point insns, since
;; they are simpler.

(define_insn ""
@@ -3404,9 +3405,48 @@
operands[1] = force_reg (DFmode, operands[1]);
}")

+;; JRP - Question: Why isn't the r->m combination supported in the SI
+;; case (only rJ->m)? Alpha can certainly do it... I *might* try this
+;; and see what I get...
+
+
+;; If we have a 21164A, then it's much preferable to use the byte/word
+;; instructions rather than to use the multiple-instruction paths...
+;; We'll only do a small number of cases here. If need be, we'll fall
+;; thru to the "old" patterns...
+
+(define_insn "movqi_direct"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,r,m")
+ (match_operand:QI 1 "input_operand" " r,J,I,m,rJ"))]
+ "TARGET_HAS_BYTEWORD && (register_operand (operands[0], QImode)
+ || reg_or_0_operand (operands[1], QImode))"
+ "@
+ bis %1,%1,%0
+ bis $31,$31,%0
+ bis $31,%1,%0
+ ldbu %0,%1
+ stbu %r1,%0"
+ [(set_attr "type" "iaddlog,iaddlog,iaddlog,ld,st")]
+)
+
+(define_insn "movhi_direct"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m")
+ (match_operand:HI 1 "input_operand" " r,J,I,m,rJ"))]
+ "TARGET_HAS_BYTEWORD && (register_operand (operands[0], HImode)
+ || reg_or_0_operand (operands[1], HImode))"
+ "@
+ bis %1,%1,%0
+ bis $31,$31,%0
+ bis $31,%1,%0
+ ldwu %0,%1
+ stwu %r1,%0"
+ [(set_attr "type" "iaddlog,iaddlog,iaddlog,ld,st")]
+)
+
+
(define_insn ""
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,m,f,f,f,m")
- (match_operand:SI 1 "input_operand" "r,J,I,K,L,m,rJ,f,J,m,fG"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,m, f,f,f,m")
+ (match_operand:SI 1 "input_operand" " r,J,I,K,L,m,rJ,f,J,m,fG"))]
"! WINDOWS_NT && (register_operand (operands[0], SImode)
|| reg_or_0_operand (operands[1], SImode))"
"@
@@ -3773,82 +3813,90 @@
if (GET_CODE (operands[0]) == MEM)
operands[1] = force_reg (QImode, operands[1]);

- /* Handle four memory cases, unaligned and aligned for either the input
- or the output. The only case where we can be called during reload is
- for aligned loads; all other cases require temporaries. */
-
- if (GET_CODE (operands[1]) == MEM
- || (GET_CODE (operands[1]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[1])) == MEM)
- || (reload_in_progress && GET_CODE (operands[1]) == REG
- && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER)
- || (reload_in_progress && GET_CODE (operands[1]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[1])) == REG
- && REGNO (SUBREG_REG (operands[1])) >= FIRST_PSEUDO_REGISTER))
- {
- if (aligned_memory_operand (operands[1], QImode))
- {
- rtx aligned_mem, bitnum;
- rtx scratch = (reload_in_progress
- ? gen_rtx (REG, SImode, REGNO (operands[0]))
- : gen_reg_rtx (SImode));
-
- get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+ /* If we have byte instructions available, use them! */
+ if(TARGET_HAS_BYTEWORD) {
+ emit_insn(gen_movqi_direct(operands[0], operands[1]));
+ DONE;
+ }
+ else {

- emit_insn (gen_aligned_loadqi (operands[0], aligned_mem, bitnum,
- scratch));
- }
- else
+ /* Handle four memory cases, unaligned and aligned for either the input
+ or the output. The only case where we can be called during reload is
+ for aligned loads; all other cases require temporaries. */
+
+ if (GET_CODE (operands[1]) == MEM
+ || (GET_CODE (operands[1]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[1])) == MEM)
+ || (reload_in_progress && GET_CODE (operands[1]) == REG
+ && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER)
+ || (reload_in_progress && GET_CODE (operands[1]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[1])) == REG
+ && REGNO (SUBREG_REG (operands[1])) >= FIRST_PSEUDO_REGISTER))
{
- /* Don't pass these as parameters since that makes the generated
- code depend on parameter evaluation order which will cause
- bootstrap failures. */
-
- rtx temp1 = gen_reg_rtx (DImode);
- rtx temp2 = gen_reg_rtx (DImode);
- rtx seq = gen_unaligned_loadqi (operands[0],
- get_unaligned_address (operands[1]),
- temp1, temp2);
+ if (aligned_memory_operand (operands[1], QImode))
+ {
+ rtx aligned_mem, bitnum;
+ rtx scratch = (reload_in_progress
+ ? gen_rtx (REG, SImode, REGNO (operands[0]))
+ : gen_reg_rtx (SImode));
+
+ get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+
+ emit_insn (gen_aligned_loadqi (operands[0], aligned_mem, bitnum,
+ scratch));
+ }
+ else
+ {
+ /* Don't pass these as parameters since that makes the generated
+ code depend on parameter evaluation order which will cause
+ bootstrap failures. */
+
+ rtx temp1 = gen_reg_rtx (DImode);
+ rtx temp2 = gen_reg_rtx (DImode);
+ rtx seq = gen_unaligned_loadqi (operands[0],
+ get_unaligned_address (operands[1]),
+ temp1, temp2);
+
+ alpha_set_memflags (seq, operands[1]);
+ emit_insn (seq);
+ }

- alpha_set_memflags (seq, operands[1]);
- emit_insn (seq);
+ DONE;
}

- DONE;
- }
-
- else if (GET_CODE (operands[0]) == MEM
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == MEM)
- || (reload_in_progress && GET_CODE (operands[0]) == REG
- && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
- || (reload_in_progress && GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
- {
- if (aligned_memory_operand (operands[0], QImode))
+ else if (GET_CODE (operands[0]) == MEM
+ || (GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == MEM)
+ || (reload_in_progress && GET_CODE (operands[0]) == REG
+ && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
+ || (reload_in_progress && GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == REG
+ && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
{
- rtx aligned_mem, bitnum;
- rtx temp1 = gen_reg_rtx (SImode);
- rtx temp2 = gen_reg_rtx (SImode);
-
- get_aligned_mem (operands[0], &aligned_mem, &bitnum);
-
- emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
- temp1, temp2));
+ if (aligned_memory_operand (operands[0], QImode))
+ {
+ rtx aligned_mem, bitnum;
+ rtx temp1 = gen_reg_rtx (SImode);
+ rtx temp2 = gen_reg_rtx (SImode);
+
+ get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+
+ emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+ temp1, temp2));
+ }
+ else
+ {
+ rtx temp1 = gen_reg_rtx (DImode);
+ rtx temp2 = gen_reg_rtx (DImode);
+ rtx temp3 = gen_reg_rtx (DImode);
+ rtx seq = gen_unaligned_storeqi (get_unaligned_address (operands[0]),
+ operands[1], temp1, temp2, temp3);
+
+ alpha_set_memflags (seq, operands[0]);
+ emit_insn (seq);
+ }
+ DONE;
}
- else
- {
- rtx temp1 = gen_reg_rtx (DImode);
- rtx temp2 = gen_reg_rtx (DImode);
- rtx temp3 = gen_reg_rtx (DImode);
- rtx seq = gen_unaligned_storeqi (get_unaligned_address (operands[0]),
- operands[1], temp1, temp2, temp3);
-
- alpha_set_memflags (seq, operands[0]);
- emit_insn (seq);
- }
- DONE;
}
}")

@@ -3863,93 +3911,101 @@
if (GET_CODE (operands[0]) == MEM)
operands[1] = force_reg (HImode, operands[1]);

- /* Handle four memory cases, unaligned and aligned for either the input
- or the output. The only case where we can be called during reload is
- for aligned loads; all other cases require temporaries. */
-
- if (GET_CODE (operands[1]) == MEM
- || (GET_CODE (operands[1]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[1])) == MEM)
- || (reload_in_progress && GET_CODE (operands[1]) == REG
- && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER)
- || (reload_in_progress && GET_CODE (operands[1]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[1])) == REG
- && REGNO (SUBREG_REG (operands[1])) >= FIRST_PSEUDO_REGISTER))
- {
- if (aligned_memory_operand (operands[1], HImode))
- {
- rtx aligned_mem, bitnum;
- rtx scratch = (reload_in_progress
- ? gen_rtx (REG, SImode, REGNO (operands[0]))
- : gen_reg_rtx (SImode));
-
- get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+ /* If we have word instructions available, use them! */
+ if(TARGET_HAS_BYTEWORD) {
+ emit_insn(gen_movhi_direct(operands[0], operands[1]));
+ DONE;
+ }
+ else {

- emit_insn (gen_aligned_loadhi (operands[0], aligned_mem, bitnum,
- scratch));
- }
- else
+ /* Handle four memory cases, unaligned and aligned for either the input
+ or the output. The only case where we can be called during reload is
+ for aligned loads; all other cases require temporaries. */
+
+ if (GET_CODE (operands[1]) == MEM
+ || (GET_CODE (operands[1]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[1])) == MEM)
+ || (reload_in_progress && GET_CODE (operands[1]) == REG
+ && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER)
+ || (reload_in_progress && GET_CODE (operands[1]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[1])) == REG
+ && REGNO (SUBREG_REG (operands[1])) >= FIRST_PSEUDO_REGISTER))
{
- rtx addr
- = force_reg (DImode,
- force_operand (get_unaligned_address (operands[1]),
- NULL_RTX));
- rtx scratch1 = gen_reg_rtx (DImode);
- rtx scratch2 = gen_reg_rtx (DImode);
- rtx scratch3 = gen_reg_rtx (DImode);
-
- rtx seq = gen_unaligned_loadhi (operands[0], addr, scratch1,
- scratch2, scratch3);
+ if (aligned_memory_operand (operands[1], HImode))
+ {
+ rtx aligned_mem, bitnum;
+ rtx scratch = (reload_in_progress
+ ? gen_rtx (REG, SImode, REGNO (operands[0]))
+ : gen_reg_rtx (SImode));
+
+ get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+
+ emit_insn (gen_aligned_loadhi (operands[0], aligned_mem, bitnum,
+ scratch));
+ }
+ else
+ {
+ rtx addr
+ = force_reg (DImode,
+ force_operand (get_unaligned_address (operands[1]),
+ NULL_RTX));
+ rtx scratch1 = gen_reg_rtx (DImode);
+ rtx scratch2 = gen_reg_rtx (DImode);
+ rtx scratch3 = gen_reg_rtx (DImode);
+
+ rtx seq = gen_unaligned_loadhi (operands[0], addr, scratch1,
+ scratch2, scratch3);
+
+ alpha_set_memflags (seq, operands[1]);
+ emit_insn (seq);
+ }

- alpha_set_memflags (seq, operands[1]);
- emit_insn (seq);
+ DONE;
}

- DONE;
- }
-
- else if (GET_CODE (operands[0]) == MEM
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == MEM)
- || (reload_in_progress && GET_CODE (operands[0]) == REG
- && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
- || (reload_in_progress && GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
- {
- if (aligned_memory_operand (operands[0], HImode))
+ else if (GET_CODE (operands[0]) == MEM
+ || (GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == MEM)
+ || (reload_in_progress && GET_CODE (operands[0]) == REG
+ && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER)
+ || (reload_in_progress && GET_CODE (operands[0]) == SUBREG
+ && GET_CODE (SUBREG_REG (operands[0])) == REG
+ && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
{
- rtx aligned_mem, bitnum;
- rtx temp1 = gen_reg_rtx (SImode);
- rtx temp2 = gen_reg_rtx (SImode);
+ if (aligned_memory_operand (operands[0], HImode))
+ {
+ rtx aligned_mem, bitnum;
+ rtx temp1 = gen_reg_rtx (SImode);
+ rtx temp2 = gen_reg_rtx (SImode);
+
+ get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+
+ emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+ temp1, temp2));
+ }
+ else
+ {
+ rtx temp1 = gen_reg_rtx (DImode);
+ rtx temp2 = gen_reg_rtx (DImode);
+ rtx temp3 = gen_reg_rtx (DImode);
+ rtx temp4 = gen_reg_rtx (DImode);
+ rtx temp5 = gen_reg_rtx (DImode);
+ rtx temp6 = gen_reg_rtx (DImode);
+ rtx temp7 = gen_reg_rtx (DImode);
+ rtx temp8 = gen_reg_rtx (DImode);
+ rtx temp9 = gen_reg_rtx (DImode);
+
+ rtx seq = gen_unaligned_storehi (get_unaligned_address (operands[0]),
+ operands[1], temp1, temp2,temp3,
+ temp4, temp5, temp6,temp7,
+ temp8, temp9);
+
+ alpha_set_memflags (seq, operands[0]);
+ emit_insn (seq);
+ }

- get_aligned_mem (operands[0], &aligned_mem, &bitnum);
-
- emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
- temp1, temp2));
+ DONE;
}
- else
- {
- rtx temp1 = gen_reg_rtx (DImode);
- rtx temp2 = gen_reg_rtx (DImode);
- rtx temp3 = gen_reg_rtx (DImode);
- rtx temp4 = gen_reg_rtx (DImode);
- rtx temp5 = gen_reg_rtx (DImode);
- rtx temp6 = gen_reg_rtx (DImode);
- rtx temp7 = gen_reg_rtx (DImode);
- rtx temp8 = gen_reg_rtx (DImode);
- rtx temp9 = gen_reg_rtx (DImode);
-
- rtx seq = gen_unaligned_storehi (get_unaligned_address (operands[0]),
- operands[1], temp1, temp2,temp3,
- temp4, temp5, temp6,temp7,
- temp8, temp9);
-
- alpha_set_memflags (seq, operands[0]);
- emit_insn (seq);
- }
-
- DONE;
}
}")

-- 
Jim Paradis (paradis@amt.tay1.dec.com)        "It's not procrastination, 
Digital Equipment Corporation		       it's my new Just-In-Time 
(508)952-4047				       Workload Management System!"
http://www.tiac.net/users/jrp/index.html