[X86] Cleanup patterns for AVX/SSE for PS operations. Always try to look for bitcasts from floating point types. If only AVX1 is supported we also need to handle integer types with floating point ops without looking for bitcasts.

Previously SSE1 had a pattern that looked for integer types without bitcasts, but the type wasn't legal with only SSE1 and SSE2 add an identical pattern for the integer instructions. llvm-svn: 278089
2016-08-09 03:06:28 +00:00 · 2016-08-09 03:06:28 +00:00 · c09273b42b
parent de06b51d3d
commit c09273b42b
1 changed files with 18 additions and 9 deletions
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@ -2897,7 +2897,8 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
  let Predicates = [HasAVX, NoVLX] in {
  defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
        !strconcat(OpcodeStr, "ps"), f256mem,
-        [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+        [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+                                  (bc_v4i64 (v8f32 VR256:$src2))))],
        [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
                           (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L;

@ -2909,12 +2910,10 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
                                  (loadv4i64 addr:$src2)))], 0>,
                                  PD, VEX_4V, VEX_L;

-  // In AVX no need to add a pattern for 128-bit logical rr ps, because they
-  // are all promoted to v2i64, and the patterns are covered by the int
-  // version. This is needed in SSE only, because v2i64 isn't supported on
-  // SSE1, but only on SSE2.
  defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
-       !strconcat(OpcodeStr, "ps"), f128mem, [],
+       !strconcat(OpcodeStr, "ps"), f128mem,
+       [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                 (bc_v2i64 (v4f32 VR128:$src2))))],
       [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
                                 (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V;

@ -2930,7 +2929,8 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
  let Constraints = "$src1 = $dst" in {
    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
         !strconcat(OpcodeStr, "ps"), f128mem,
-         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (bc_v2i64 (v4f32 VR128:$src2))))],
         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
                                   (memopv2i64 addr:$src2)))]>, PS;

@ -2949,9 +2949,18 @@ defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
 let isCommutable = 0 in
  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;

-// AVX1 requires type coercions in order to fold loads directly into logical
-// operations.
+// If only AVX1 is supported, we need to handle integer operations with
+// floating point instructions since the integer versions aren't available.
 let Predicates = [HasAVX1Only] in {
+  def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
+            (VANDPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
+            (VORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
+            (VXORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
+            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+
  def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
            (VANDPSYrm VR256:$src1, addr:$src2)>;
  def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),