[DAGCombiner] Fix for big endian in ForwardStoreValueToDirectLoad

Summary:
Normalize the offset for endianess before checking
if the store cover the load in ForwardStoreValueToDirectLoad.

Without this we missed out on some optimizations for big
endian targets. If for example having a 4 bytes store followed
by a 1 byte load, loading the least significant byte from the
store, the STCoversLD check would fail (see @test4 in
test/CodeGen/AArch64/load-store-forwarding.ll).

This patch also fixes a problem seen in an out-of-tree target.
The target has i40 as a legal type, it is big endian,
and the StoreSize for i40 is 48 bits. So when normalizing
the offset for endianess we need to take the StoreSize into
account (assuming that padding added when storing into
a larger StoreSize always is added at the most significant
end).

Reviewers: niravd

Reviewed By: niravd

Subscribers: javed.absar, kristof.beyls, llvm-commits, uabelho

Differential Revision: https://reviews.llvm.org/D53776

llvm-svn: 345636
This commit is contained in:
Bjorn Pettersson 2018-10-30 20:16:39 +00:00
parent fa03c690bd
commit fe09a20f09
2 changed files with 92 additions and 11 deletions

View File

@ -12854,19 +12854,23 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
int64_t Offset;
bool STCoversLD =
BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset) && (Offset >= 0) &&
(Offset * 8 <= LDMemType.getSizeInBits()) &&
(Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
if (!STCoversLD)
if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
return SDValue();
// Normalize for Endianness.
// Normalize for Endianness. After this Offset=0 will denote that the least
// significant bit in the loaded value maps to the least significant bit in
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
// n:th least significant byte of the stored value.
if (DAG.getDataLayout().isBigEndian())
Offset =
(STMemType.getSizeInBits() - LDMemType.getSizeInBits()) / 8 - Offset;
Offset = (STMemType.getStoreSizeInBits() -
LDMemType.getStoreSizeInBits()) / 8 - Offset;
// Check that the stored value cover all bits that are loaded.
bool STCoversLD =
(Offset >= 0) &&
(Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
if (!STCoversLD)
return SDValue();
// Memory as copy space (potentially masked).
if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
@ -12899,7 +12903,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
continue;
if (STMemType != LDMemType) {
// TODO: Support vectors? This requires extract_subvector/bitcast.
if (!STMemType.isVector() && !LDMemType.isVector() &&
if (!STMemType.isVector() && !LDMemType.isVector() &&
STMemType.isInteger() && LDMemType.isInteger())
Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
else

View File

@ -0,0 +1,77 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck %s --check-prefix CHECK-BE
; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefix CHECK-LE
define i8 @test1(i32 %a, i8* %pa) {
; CHECK-BE-LABEL: test1:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: str w0, [x1]
; CHECK-BE-NEXT: ldrb w0, [x1]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-LABEL: test1:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: ret
%p32 = bitcast i8* %pa to i32*
%p8 = getelementptr i8, i8* %pa, i32 0
store i32 %a, i32* %p32
%res = load i8, i8* %p8
ret i8 %res
}
define i8 @test2(i32 %a, i8* %pa) {
; CHECK-BE-LABEL: test2:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: str w0, [x1]
; CHECK-BE-NEXT: ldrb w0, [x1, #1]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-LABEL: test2:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: ubfx w0, w0, #8, #8
; CHECK-LE-NEXT: ret
%p32 = bitcast i8* %pa to i32*
%p8 = getelementptr i8, i8* %pa, i32 1
store i32 %a, i32* %p32
%res = load i8, i8* %p8
ret i8 %res
}
define i8 @test3(i32 %a, i8* %pa) {
; CHECK-BE-LABEL: test3:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: str w0, [x1]
; CHECK-BE-NEXT: ldrb w0, [x1, #2]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-LABEL: test3:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: ubfx w0, w0, #16, #8
; CHECK-LE-NEXT: ret
%p32 = bitcast i8* %pa to i32*
%p8 = getelementptr i8, i8* %pa, i32 2
store i32 %a, i32* %p32
%res = load i8, i8* %p8
ret i8 %res
}
define i8 @test4(i32 %a, i8* %pa) {
; CHECK-BE-LABEL: test4:
; CHECK-BE: // %bb.0:
; CHECK-BE-NEXT: str w0, [x1]
; CHECK-BE-NEXT: ret
;
; CHECK-LE-LABEL: test4:
; CHECK-LE: // %bb.0:
; CHECK-LE-NEXT: str w0, [x1]
; CHECK-LE-NEXT: lsr w0, w0, #24
; CHECK-LE-NEXT: ret
%p32 = bitcast i8* %pa to i32*
%p8 = getelementptr i8, i8* %pa, i32 3
store i32 %a, i32* %p32
%res = load i8, i8* %p8
ret i8 %res
}