[X86] Don't promote i16 compares to i32 if the immediate will fit in 8 bits.

The comments in this code say we were trying to avoid 16-bit immediates, but if the immediate fits in 8-bits this isn't an issue. This avoids creating a zero extend that probably won't go away.

The movmskb related changes are interesting. The movmskb instruction writes a 32-bit result, but fills the upper bits with 0. So the zero_extend we were previously emitting was free, but we turned a -1 immediate that would fit in 8-bits into a 32-bit immediate so it was still bad.

llvm-svn: 343871
This commit is contained in:
Craig Topper 2018-10-05 18:13:36 +00:00
parent 57b0da0688
commit 0ed892da70
7 changed files with 39 additions and 44 deletions

View File

@ -18435,8 +18435,11 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
// Only promote the compare up to I32 if it is a 16 bit operation
// with an immediate. 16 bit immediates are to be avoided.
if ((Op0.getValueType() == MVT::i16 &&
(isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1))) &&
if (Op0.getValueType() == MVT::i16 &&
((isa<ConstantSDNode>(Op0) &&
!cast<ConstantSDNode>(Op0)->getAPIntValue().isSignedIntN(8)) ||
(isa<ConstantSDNode>(Op1) &&
!cast<ConstantSDNode>(Op1)->getAPIntValue().isSignedIntN(8))) &&
!DAG.getMachineFunction().getFunction().optForMinSize() &&
!Subtarget.isAtom()) {
unsigned ExtendOp =

View File

@ -508,16 +508,14 @@ define i1 @add_ugecmp_bad_i8_i16(i16 %x) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl $128, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: cmpl $127, %eax
; X86-NEXT: cmpw $127, %ax
; X86-NEXT: seta %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_bad_i8_i16:
; X64: # %bb.0:
; X64-NEXT: subl $-128, %edi
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: cmpl $127, %eax
; X64-NEXT: cmpw $127, %di
; X64-NEXT: seta %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
@ -600,16 +598,14 @@ define i1 @add_ugecmp_bad_i16_i4(i16 %x) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: cmpl $15, %eax
; X86-NEXT: cmpw $15, %ax
; X86-NEXT: seta %al
; X86-NEXT: retl
;
; X64-LABEL: add_ugecmp_bad_i16_i4:
; X64: # %bb.0:
; X64-NEXT: addl $8, %edi
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: cmpl $15, %eax
; X64-NEXT: cmpw $15, %di
; X64-NEXT: seta %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 8 ; 1U << (4-1)

View File

@ -8,14 +8,14 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) {
; SSE2-LABEL: allones_v16i8_sign:
; SSE2: # %bb.0:
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
; AVX-LABEL: allones_v16i8_sign:
; AVX: # %bb.0:
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX-NEXT: cmpw $-1, %ax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
;
@ -352,7 +352,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; SSE2-NEXT: pcmpgtw %xmm0, %xmm2
; SSE2-NEXT: packsswb %xmm3, %xmm2
; SSE2-NEXT: pmovmskb %xmm2, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -364,7 +364,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; AVX1-NEXT: vpcmpgtw %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -376,7 +376,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: cmpw $-1, %ax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -719,7 +719,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; SSE2-NEXT: packssdw %xmm2, %xmm4
; SSE2-NEXT: packsswb %xmm3, %xmm4
; SSE2-NEXT: pmovmskb %xmm4, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -736,7 +736,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -751,7 +751,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: cmpw $-1, %ax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1126,7 +1126,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; SSE2: # %bb.0:
; SSE2-NEXT: psllw $7, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -1134,7 +1134,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; AVX: # %bb.0:
; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX-NEXT: cmpw $-1, %ax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
;
@ -1528,7 +1528,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
; SSE2-NEXT: packsswb %xmm1, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -1544,7 +1544,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1557,7 +1557,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: cmpw $-1, %ax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1973,7 +1973,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: packsswb %xmm2, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -1998,7 +1998,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2015,7 +2015,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: cmpw $-1, %ax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -2458,7 +2458,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; SSE2: # %bb.0:
; SSE2-NEXT: psllw $5, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -2466,7 +2466,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; AVX: # %bb.0:
; AVX-NEXT: vpsllw $5, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX-NEXT: cmpw $-1, %ax
; AVX-NEXT: sete %al
; AVX-NEXT: retq
;
@ -2860,7 +2860,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; SSE2-NEXT: pcmpeqw %xmm2, %xmm0
; SSE2-NEXT: packsswb %xmm1, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -2876,7 +2876,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -2889,7 +2889,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: cmpw $-1, %ax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -3305,7 +3305,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: packsswb %xmm2, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE2-NEXT: cmpw $-1, %ax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@ -3330,7 +3330,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: cmpw $-1, %ax
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -3347,7 +3347,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: cmpw $-1, %ax
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq

View File

@ -60,8 +60,7 @@ define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
; ANY-LABEL: unsigned_sat_constant_i16_using_min:
; ANY: # %bb.0:
; ANY-NEXT: movzwl %di, %eax
; ANY-NEXT: cmpl $65493, %eax # imm = 0xFFD5
; ANY-NEXT: cmpw $-43, %di
; ANY-NEXT: movl $65493, %eax # imm = 0xFFD5
; ANY-NEXT: cmovbl %edi, %eax
; ANY-NEXT: addl $42, %eax

View File

@ -9,7 +9,7 @@ define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
; CHECK-LABEL: t1:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $26, %edi
; CHECK-NEXT: cmpw $26, %di
; CHECK-NEXT: seta %al
; CHECK-NEXT: shll $5, %eax
; CHECK-NEXT: retq
@ -22,7 +22,7 @@ define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
; CHECK-LABEL: t2:
; CHECK: ## %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $26, %edi
; CHECK-NEXT: cmpw $26, %di
; CHECK-NEXT: setb %al
; CHECK-NEXT: shll $5, %eax
; CHECK-NEXT: retq

View File

@ -596,16 +596,14 @@ define i1 @add_ultcmp_bad_i16_i4(i16 %x) nounwind {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: addl $8, %eax
; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: cmpl $16, %eax
; X86-NEXT: cmpw $16, %ax
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: add_ultcmp_bad_i16_i4:
; X64: # %bb.0:
; X64-NEXT: addl $8, %edi
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: cmpl $16, %eax
; X64-NEXT: cmpw $16, %di
; X64-NEXT: setb %al
; X64-NEXT: retq
%tmp0 = add i16 %x, 8 ; 1U << (4-1)

View File

@ -516,8 +516,7 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt
; CHECK: testq %rdi, %rdi
; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]
;
; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]]
; CHECK-NEXT: cmpl $66, [[BF_LOAD]]
; CHECK: cmpw $66, (%rdi)
; CHECK-NEXT: jne [[CLEANUP]]
;
; CHECK: movq 8(%rdi), %rdi