[X86] Replace VPCOM/VPCOMU with generic integer comparisons (llvm)

These intrinsics can always be replaced with generic integer comparisons without any regression in codegen, even for -O0/-fast-isel cases.

Noticed while cleaning up vector integer comparison costs for PR40376.

A future commit will remove/autoupgrade the existing VPCOM/VPCOMU llvm intrinsics.

llvm-svn: 351688
This commit is contained in:
Simon Pilgrim 2019-01-20 16:40:44 +00:00
parent a7bcd72c0a
commit 4fd2459c4d
2 changed files with 24 additions and 24 deletions

View File

@ -1908,28 +1908,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
def int_x86_xop_vpcomb : GCCBuiltin<"__builtin_ia32_vpcomb">,
def int_x86_xop_vpcomb :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomw : GCCBuiltin<"__builtin_ia32_vpcomw">,
def int_x86_xop_vpcomw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomd : GCCBuiltin<"__builtin_ia32_vpcomd">,
def int_x86_xop_vpcomd :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomq : GCCBuiltin<"__builtin_ia32_vpcomq">,
def int_x86_xop_vpcomq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomub : GCCBuiltin<"__builtin_ia32_vpcomub">,
def int_x86_xop_vpcomub :
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuw : GCCBuiltin<"__builtin_ia32_vpcomuw">,
def int_x86_xop_vpcomuw :
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomud : GCCBuiltin<"__builtin_ia32_vpcomud">,
def int_x86_xop_vpcomud :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_xop_vpcomuq : GCCBuiltin<"__builtin_ia32_vpcomuq">,
def int_x86_xop_vpcomuq :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
llvm_i8_ty], [IntrNoMem]>;

View File

@ -577,11 +577,11 @@ define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0)
%cmp = icmp ult <16 x i8> %arg0, %arg1
%res = sext <16 x i1> %cmp to <16 x i8>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epu16:
@ -590,11 +590,11 @@ define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0)
%cmp = icmp ult <8 x i16> %arg0, %arg1
%res = sext <8 x i1> %cmp to <8 x i16>
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epu32:
@ -603,21 +603,21 @@ define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0)
%cmp = icmp ult <4 x i32> %arg0, %arg1
%res = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epu64:
; ALL: # %bb.0:
; ALL-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0
; ALL-NEXT: ret{{[l|q]}}
%res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
%cmp = icmp ult <2 x i64> %a0, %a1
%res = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epi8:
@ -626,11 +626,11 @@ define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
%res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0)
%cmp = icmp slt <16 x i8> %arg0, %arg1
%res = sext <16 x i1> %cmp to <16 x i8>
%bc = bitcast <16 x i8> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epi16:
@ -639,11 +639,11 @@ define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
%arg1 = bitcast <2 x i64> %a1 to <8 x i16>
%res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0)
%cmp = icmp slt <8 x i16> %arg0, %arg1
%res = sext <8 x i1> %cmp to <8 x i16>
%bc = bitcast <8 x i16> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epi32:
@ -652,21 +652,21 @@ define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-NEXT: ret{{[l|q]}}
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0)
%cmp = icmp slt <4 x i32> %arg0, %arg1
%res = sext <4 x i1> %cmp to <4 x i32>
%bc = bitcast <4 x i32> %res to <2 x i64>
ret <2 x i64> %bc
}
declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone
define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) {
; ALL-LABEL: test_mm_com_epi64:
; ALL: # %bb.0:
; ALL-NEXT: vpcomltq %xmm1, %xmm0, %xmm0
; ALL-NEXT: ret{{[l|q]}}
%res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
%cmp = icmp slt <2 x i64> %a0, %a1
%res = sext <2 x i1> %cmp to <2 x i64>
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone
define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) {
; ALL-LABEL: test_mm_permute2_pd: