[x86] Allow folding unaligned memory operands into pcmp[ei]str*

instructions.

These have special permission according to the x86 manual to read
unaligned memory, and this folding is done by ICC and GCC as well.

This corrects one of the issues identified in PR37246.

llvm-svn: 330896
This commit is contained in:
Chandler Carruth 2018-04-26 03:17:25 +00:00
parent 8cc8c0a87c
commit eb631ef51e
3 changed files with 25 additions and 37 deletions

View File

@ -631,10 +631,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
{ X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
{ X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
{ X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 },
{ X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 },
{ X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 },
{ X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 },
{ X86::PCMPESTRIrr, X86::PCMPESTRIrm, 0 },
{ X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, 0 },
{ X86::PCMPISTRIrr, X86::PCMPISTRIrm, 0 },
{ X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, 0 },
{ X86::PHMINPOSUWrr, X86::PHMINPOSUWrm, TB_ALIGN_16 },
{ X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
{ X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },

View File

@ -437,7 +437,7 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; VCHECK-NEXT: vpcmpistrm $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x62,0x00,0x07]
; VCHECK-NEXT: retl ## encoding: [0xc3]
%1 = load <16 x i8>, <16 x i8>* %a1
%1 = load <16 x i8>, <16 x i8>* %a1, align 1
%res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
ret <16 x i8> %res
}

View File

@ -123,19 +123,18 @@ define i1 @pcmpestri_mem_eq_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rh
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
; X32-NEXT: movdqu (%ecx), %xmm1
; X32-NEXT: pcmpestri $24, %xmm1, %xmm0
; X32-NEXT: pcmpestri $24, (%ecx), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_eq_i8:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: pcmpestri $24, %xmm1, %xmm0
; X64-NEXT: pcmpestri $24, (%r8), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@ -157,19 +156,18 @@ define i32 @pcmpestri_mem_idx_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
; X32-NEXT: movdqu (%ecx), %xmm1
; X32-NEXT: pcmpestri $24, %xmm1, %xmm0
; X32-NEXT: pcmpestri $24, (%ecx), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_idx_i8:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: pcmpestri $24, %xmm1, %xmm0
; X64-NEXT: pcmpestri $24, (%r8), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
@ -380,19 +378,18 @@ define i1 @pcmpestri_mem_eq_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
; X32-NEXT: movdqu (%ecx), %xmm1
; X32-NEXT: pcmpestri $25, %xmm1, %xmm0
; X32-NEXT: pcmpestri $25, (%ecx), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_eq_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: pcmpestri $25, %xmm1, %xmm0
; X64-NEXT: pcmpestri $25, (%r8), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@ -416,19 +413,18 @@ define i32 @pcmpestri_mem_idx_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i3
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movdqu (%esi), %xmm0
; X32-NEXT: movdqu (%ecx), %xmm1
; X32-NEXT: pcmpestri $25, %xmm1, %xmm0
; X32-NEXT: pcmpestri $25, (%ecx), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: popl %esi
; X32-NEXT: retl
;
; X64-LABEL: pcmpestri_mem_idx_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdx, %r8
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rdx), %xmm1
; X64-NEXT: movl %esi, %eax
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: pcmpestri $25, %xmm1, %xmm0
; X64-NEXT: pcmpestri $25, (%r8), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
@ -622,16 +618,14 @@ define i1 @pcmpistri_mem_eq_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
; X32-NEXT: movdqu (%eax), %xmm1
; X32-NEXT: pcmpistri $24, %xmm1, %xmm0
; X32-NEXT: pcmpistri $24, (%eax), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_eq_i8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rsi), %xmm1
; X64-NEXT: pcmpistri $24, %xmm1, %xmm0
; X64-NEXT: pcmpistri $24, (%rsi), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@ -650,16 +644,14 @@ define i32 @pcmpistri_mem_idx_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
; X32-NEXT: movdqu (%eax), %xmm1
; X32-NEXT: pcmpistri $24, %xmm1, %xmm0
; X32-NEXT: pcmpistri $24, (%eax), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_idx_i8:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rsi), %xmm1
; X64-NEXT: pcmpistri $24, %xmm1, %xmm0
; X64-NEXT: pcmpistri $24, (%rsi), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry:
@ -849,16 +841,14 @@ define i1 @pcmpistri_mem_eq_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
; X32-NEXT: movdqu (%eax), %xmm1
; X32-NEXT: pcmpistri $25, %xmm1, %xmm0
; X32-NEXT: pcmpistri $25, (%eax), %xmm0
; X32-NEXT: setae %al
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_eq_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rsi), %xmm1
; X64-NEXT: pcmpistri $25, %xmm1, %xmm0
; X64-NEXT: pcmpistri $25, (%rsi), %xmm0
; X64-NEXT: setae %al
; X64-NEXT: retq
entry:
@ -879,16 +869,14 @@ define i32 @pcmpistri_mem_idx_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind {
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movdqu (%ecx), %xmm0
; X32-NEXT: movdqu (%eax), %xmm1
; X32-NEXT: pcmpistri $25, %xmm1, %xmm0
; X32-NEXT: pcmpistri $25, (%eax), %xmm0
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: retl
;
; X64-LABEL: pcmpistri_mem_idx_i16:
; X64: # %bb.0: # %entry
; X64-NEXT: movdqu (%rdi), %xmm0
; X64-NEXT: movdqu (%rsi), %xmm1
; X64-NEXT: pcmpistri $25, %xmm1, %xmm0
; X64-NEXT: pcmpistri $25, (%rsi), %xmm0
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: retq
entry: