PR32710: Disable using PMADDWD for unsigned short.
Summary: PMADDWD can only handle signed short. Reviewers: mkuper, wmi Reviewed By: mkuper Subscribers: andreadb, llvm-commits Differential Revision: https://reviews.llvm.org/D32236 llvm-svn: 300737
This commit is contained in:
parent
021a218dd2
commit
58601674d2
|
@ -34631,7 +34631,7 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
|
|||
return SDValue();
|
||||
|
||||
ShrinkMode Mode;
|
||||
if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode))
|
||||
if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
|
|
@ -3,23 +3,26 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512
|
||||
|
||||
;SSE2-label: @_Z10test_shortPsS_i
|
||||
;SSE2-LABEL: @_Z10test_shortPsS_i
|
||||
;SSE2: movdqu
|
||||
;SSE2-NEXT: movdqu
|
||||
;SSE2-NEXT: pmaddwd
|
||||
;SSE2-NEXT: paddd
|
||||
;SSE2: ret
|
||||
|
||||
;AVX2-label: @_Z10test_shortPsS_i
|
||||
;AVX2-LABEL: @_Z10test_shortPsS_i
|
||||
;AVX2: vmovdqu
|
||||
;AVX2-NEXT: vpmaddwd
|
||||
;AVX2-NEXT: vinserti128
|
||||
;AVX2-NEXT: vpaddd
|
||||
;AVX2: ret
|
||||
|
||||
;AVX512-label: @_Z10test_shortPsS_i
|
||||
;AVX512-LABEL: @_Z10test_shortPsS_i
|
||||
;AVX512: vmovdqu
|
||||
;AVX512-NEXT: vpmaddwd
|
||||
;AVX512-NEXT: vinserti128
|
||||
;AVX512-NEXT: vpaddd
|
||||
;AVX512: ret
|
||||
|
||||
define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 {
|
||||
entry:
|
||||
|
@ -54,18 +57,65 @@ middle.block:
|
|||
ret i32 %13
|
||||
}
|
||||
|
||||
;AVX2-label: @_Z9test_charPcS_i
|
||||
;SSE2-LABEL: @test_unsigned_short
|
||||
;SSE2-NOT: pmaddwd
|
||||
;SSE2: ret
|
||||
|
||||
;AVX2-LABEL: @test_unsigned_short
|
||||
;AVX2-NOT: vpmaddwd
|
||||
;AVX2: ret
|
||||
|
||||
;AVX512-LABEL: @test_unsigned_short
|
||||
;AVX512-NOT: vpmaddwd
|
||||
;AVX512: ret
|
||||
|
||||
define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly, i32) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%3 = zext i32 %2 to i64
|
||||
br label %vector.body
|
||||
|
||||
vector.body:
|
||||
%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
|
||||
%vec.phi = phi <8 x i32> [ %11, %vector.body ], [ zeroinitializer, %entry ]
|
||||
%4 = getelementptr inbounds i16, i16* %0, i64 %index
|
||||
%5 = bitcast i16* %4 to <8 x i16>*
|
||||
%wide.load = load <8 x i16>, <8 x i16>* %5, align 2
|
||||
%6 = zext <8 x i16> %wide.load to <8 x i32>
|
||||
%7 = getelementptr inbounds i16, i16* %1, i64 %index
|
||||
%8 = bitcast i16* %7 to <8 x i16>*
|
||||
%wide.load14 = load <8 x i16>, <8 x i16>* %8, align 2
|
||||
%9 = zext <8 x i16> %wide.load14 to <8 x i32>
|
||||
%10 = mul nsw <8 x i32> %9, %6
|
||||
%11 = add nsw <8 x i32> %10, %vec.phi
|
||||
%index.next = add i64 %index, 8
|
||||
%12 = icmp eq i64 %index.next, %3
|
||||
br i1 %12, label %middle.block, label %vector.body
|
||||
|
||||
middle.block:
|
||||
%rdx.shuf = shufflevector <8 x i32> %11, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%bin.rdx = add <8 x i32> %11, %rdx.shuf
|
||||
%rdx.shuf15 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%bin.rdx16 = add <8 x i32> %bin.rdx, %rdx.shuf15
|
||||
%rdx.shuf17 = shufflevector <8 x i32> %bin.rdx16, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%bin.rdx18 = add <8 x i32> %bin.rdx16, %rdx.shuf17
|
||||
%13 = extractelement <8 x i32> %bin.rdx18, i32 0
|
||||
ret i32 %13
|
||||
}
|
||||
|
||||
;AVX2-LABEL: @_Z9test_charPcS_i
|
||||
;AVX2: vpmovsxbw
|
||||
;AVX2-NEXT: vpmovsxbw
|
||||
;AVX2-NEXT: vpmaddwd
|
||||
;AVX2-NEXT: vpaddd
|
||||
;AVX2: ret
|
||||
|
||||
;AVX512-label: @_Z9test_charPcS_i
|
||||
;AVX512-LABEL: @_Z9test_charPcS_i
|
||||
;AVX512: vpmovsxbw
|
||||
;AVX512-NEXT: vpmovsxbw
|
||||
;AVX512-NEXT: vpmaddwd
|
||||
;AVX512-NEXT: vinserti64x4
|
||||
;AVX512-NEXT: vpaddd
|
||||
;AVX512: ret
|
||||
|
||||
define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i32) local_unnamed_addr #0 {
|
||||
entry:
|
||||
|
|
Loading…
Reference in New Issue