diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1a0c93734e73..8f8024356f5c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9351,8 +9351,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. unsigned Opc; - bool Swap = false, Invert = false, FlipSigns = false; - + bool Swap = false, Invert = false, FlipSigns = false, MinMax = false; + switch (SetCCOpcode) { default: llvm_unreachable("Unexpected SETCC condition"); case ISD::SETNE: Invert = true; @@ -9366,6 +9366,23 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, case ISD::SETUGE: Swap = true; case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break; } + + // Special case: Use min/max operations for SETULE/SETUGE + MVT VET = VT.getVectorElementType(); + bool hasMinMax = + (Subtarget->hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) + || (Subtarget->hasSSE2() && (VET == MVT::i8)); + + if (hasMinMax) { + switch (SetCCOpcode) { + default: break; + case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break; + case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break; + } + + if (MinMax) { Swap = false; Invert = false; FlipSigns = false; } + } + if (Swap) std::swap(Op0, Op1); @@ -9452,6 +9469,9 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // If the logical-not of the result is required, perform that now. if (Invert) Result = DAG.getNOT(dl, Result, VT); + + if (MinMax) + Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); return Result; } diff --git a/llvm/test/CodeGen/X86/vec_setcc.ll b/llvm/test/CodeGen/X86/vec_setcc.ll new file mode 100644 index 000000000000..b1bf52dc3183 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec_setcc.ll @@ -0,0 +1,126 @@ +; RUN: llc < %s -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2 +; RUN: llc < %s -mcpu=x86-64 -mattr=sse41 | FileCheck %s -check-prefix=SSE41 +; RUN: llc < %s -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX + +define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable { + %1 = icmp uge <16 x i8> %a, %b + %2 = sext <16 x i1> %1 to <16 x i8> + ret <16 x i8> %2 +; SSE2: _v16i8_icmp_uge: +; SSE2: pmaxub %xmm0, %xmm1 +; SSE2: pcmpeqb %xmm1, %xmm0 + +; SSE41: _v16i8_icmp_uge: +; SSE41: pmaxub %xmm0, %xmm1 +; SSE41: pcmpeqb %xmm1, %xmm0 + +; AVX: _v16i8_icmp_uge: +; AVX: vpmaxub %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0 +} + +define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable { + %1 = icmp ule <16 x i8> %a, %b + %2 = sext <16 x i1> %1 to <16 x i8> + ret <16 x i8> %2 +; SSE2: _v16i8_icmp_ule: +; SSE2: pminub %xmm0, %xmm1 +; SSE2: pcmpeqb %xmm1, %xmm0 + +; SSE41: _v16i8_icmp_ule: +; SSE41: pminub %xmm0, %xmm1 +; SSE41: pcmpeqb %xmm1, %xmm0 + +; AVX: _v16i8_icmp_ule: +; AVX: vpminub %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0 +} + + +define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable { + %1 = icmp uge <8 x i16> %a, %b + %2 = sext <8 x i1> %1 to <8 x i16> + ret <8 x i16> %2 +; SSE2: _v8i16_icmp_uge: +; SSE2: movdqa LCPI2_0(%rip), %xmm2 +; SEE2: pxor %xmm2, %xmm0 +; SSE2: pxor %xmm1, %xmm2 +; SSE2: pcmpgtw %xmm0, %xmm2 +; SSE2: pcmpeqd %xmm0, %xmm0 +; SSE2: pxor %xmm2, %xmm0 + +; SSE41: _v8i16_icmp_uge: +; SSE41: pmaxuw %xmm0, %xmm1 +; SSE41: pcmpeqw %xmm1, %xmm0 + +; AVX: _v8i16_icmp_uge: +; AVX: vpmaxuw %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0 +} + +define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable { + %1 = icmp ule <8 x i16> %a, %b + %2 = sext <8 x i1> %1 to <8 x i16> + ret <8 x i16> %2 +; SSE2: _v8i16_icmp_ule: +; SSE2: movdqa LCPI3_0(%rip), %xmm2 +; SSE2: pxor %xmm2, %xmm1 +; SSE2: pxor %xmm2, %xmm0 +; SSE2: pcmpgtw %xmm1, %xmm0 +; SSE2: pcmpeqd %xmm1, %xmm1 +; SSE2: pxor %xmm0, %xmm1 +; SSE2: movdqa %xmm1, %xmm0 + +; SSE41: _v8i16_icmp_ule: +; SSE41: pminuw %xmm0, %xmm1 +; SSE41: pcmpeqw %xmm1, %xmm0 + +; AVX: _v8i16_icmp_ule: +; AVX: vpminuw %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0 +} + + +define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable { + %1 = icmp uge <4 x i32> %a, %b + %2 = sext <4 x i1> %1 to <4 x i32> + ret <4 x i32> %2 +; SSE2: _v4i32_icmp_uge: +; SSE2: movdqa LCPI4_0(%rip), %xmm2 +; SSE2: pxor %xmm2, %xmm0 +; SSE2: pxor %xmm1, %xmm2 +; SSE2: pcmpgtd %xmm0, %xmm2 +; SSE2: pcmpeqd %xmm0, %xmm0 +; SSE2: pxor %xmm2, %xmm0 + +; SSE41: _v4i32_icmp_uge: +; SSE41: pmaxud %xmm0, %xmm1 +; SSE41: pcmpeqd %xmm1, %xmm0 + +; AVX: _v4i32_icmp_uge: +; AVX: vpmaxud %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqd %xmm1, %xmm0, %xmm0 +} + +define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable { + %1 = icmp ule <4 x i32> %a, %b + %2 = sext <4 x i1> %1 to <4 x i32> + ret <4 x i32> %2 +; SSE2: _v4i32_icmp_ule: +; SSE2: movdqa LCPI5_0(%rip), %xmm2 +; SSE2: pxor %xmm2, %xmm1 +; SSE2: pxor %xmm2, %xmm0 +; SSE2: pcmpgtd %xmm1, %xmm0 +; SSE2: pcmpeqd %xmm1, %xmm1 +; SSE2: pxor %xmm0, %xmm1 +; SSE2: movdqa %xmm1, %xmm0 + +; SSE41: _v4i32_icmp_ule: +; SSE41: pminud %xmm0, %xmm1 +; SSE41: pcmpeqd %xmm1, %xmm0 + +; AVX: _v4i32_icmp_ule: +; AVX: pminud %xmm1, %xmm0, %xmm1 +; AVX: pcmpeqd %xmm1, %xmm0, %xmm0 +} +