CellSPU:

(a) Slight rethink on i64 zero/sign/any extend code - use a shuffle to directly zero-extend i32 to i64, but use rotates and shifts for sign extension. Also ensure unified register consistency. (b) Add new test harness for i64 operations: i64ops.ll llvm-svn: 59970
2008-11-24 18:20:46 +00:00 · 2008-11-24 18:20:46 +00:00 · 2e5df906f8
parent 5cc12a8e31
commit 2e5df906f8
2 changed files with 42 additions and 4 deletions
--- a/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@ -2363,16 +2363,27 @@ static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)

    SDValue PromoteScalar =
            DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
-    SDValue RotQuad =
-            DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
-                        PromoteScalar, DAG.getConstant(4, MVT::i32));

    if (Opc != ISD::SIGN_EXTEND) {
+      // Use a shuffle to zero extend the i32 to i64 directly:
+      SDValue shufMask =
+              DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
+                          DAG.getConstant(0x80808080, MVT::i32),
+                          DAG.getConstant(0x00010203, MVT::i32),
+                          DAG.getConstant(0x80808080, MVT::i32),
+                          DAG.getConstant(0x08090a0b, MVT::i32));
+      SDValue zextShuffle =
+              DAG.getNode(SPUISD::SHUFB, Op0VecVT,
+                          PromoteScalar, PromoteScalar, shufMask);
+
      return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
-                         DAG.getNode(ISD::BIT_CONVERT, VecVT, RotQuad));
+                         DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
    } else {
      // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
      // right and propagate the sign bit) instruction.
+      SDValue RotQuad =
+              DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
+                          PromoteScalar, DAG.getConstant(4, MVT::i32));
      SDValue SignQuad =
              DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
                          PromoteScalar, DAG.getConstant(32, MVT::i32));
--- a/llvm/test/CodeGen/CellSPU/i64ops.ll
+++ b/llvm/test/CodeGen/CellSPU/i64ops.ll
@ -0,0 +1,27 @@
+; RUN: llvm-as -o - %s | llc -march=cellspu > %t1.s
+; RUN: grep {fsmbi.*61680}   %t1.s | count 1
+; RUN: grep rotqmbyi         %t1.s | count 1
+; RUN: grep rotmai           %t1.s | count 1
+; RUN: grep selb             %t1.s | count 1
+; RUN: grep shufb            %t1.s | count 2
+; RUN: grep cg               %t1.s | count 1
+; RUN: grep addx             %t1.s | count 1
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i64 @sext_i64_i32(i32 %a) nounwind {
+  %1 = sext i32 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i32(i32 %a) nounwind {
+  %1 = zext i32 %a to i64
+  ret i64 %1
+}
+
+define i64 @add_i64(i64 %a, i64 %b) nounwind {
+  %1 = add i64 %a, %b
+  ret i64 %1
+}