[AArch64]Extend merging narrow loads into a wider load

This change extends r251438 to handle more narrow load promotions
including byte type, unscaled, and signed. For example, this change will
convert :
  ldursh w1, [x0, #-2]
  ldurh  w2, [x0, #-4]
into
  ldur  w2, [x0, #-4]
  asr   w1, w2, #16
  and   w2, w2, #0xffff

llvm-svn: 253577
This commit is contained in:
Jun Bum Lim 2015-11-19 17:21:41 +00:00
parent 89d7ff5de6
commit 4c35ccac91
2 changed files with 363 additions and 26 deletions

View File

@ -161,6 +161,9 @@ static bool isUnscaledLdSt(unsigned Opc) {
case AArch64::LDURXi:
case AArch64::LDURSWi:
case AArch64::LDURHHi:
case AArch64::LDURBBi:
case AArch64::LDURSBWi:
case AArch64::LDURSHWi:
return true;
}
}
@ -169,16 +172,39 @@ static bool isUnscaledLdSt(MachineInstr *MI) {
return isUnscaledLdSt(MI->getOpcode());
}
static unsigned getBitExtrOpcode(MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
llvm_unreachable("Unexpected opcode.");
case AArch64::LDRBBui:
case AArch64::LDURBBi:
case AArch64::LDRHHui:
case AArch64::LDURHHi:
return AArch64::UBFMWri;
case AArch64::LDRSBWui:
case AArch64::LDURSBWi:
case AArch64::LDRSHWui:
case AArch64::LDURSHWi:
return AArch64::SBFMWri;
}
}
static bool isSmallTypeLdMerge(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64::LDRHHui:
case AArch64::LDURHHi:
case AArch64::LDRBBui:
case AArch64::LDURBBi:
case AArch64::LDRSHWui:
case AArch64::LDURSHWi:
case AArch64::LDRSBWui:
case AArch64::LDURSBWi:
return true;
// FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
}
}
static bool isSmallTypeLdMerge(MachineInstr *MI) {
return isSmallTypeLdMerge(MI->getOpcode());
}
@ -189,10 +215,15 @@ static int getMemScale(MachineInstr *MI) {
default:
llvm_unreachable("Opcode has unknown scale!");
case AArch64::LDRBBui:
case AArch64::LDURBBi:
case AArch64::LDRSBWui:
case AArch64::LDURSBWi:
case AArch64::STRBBui:
return 1;
case AArch64::LDRHHui:
case AArch64::LDURHHi:
case AArch64::LDRSHWui:
case AArch64::LDURSHWi:
case AArch64::STRHHui:
return 2;
case AArch64::LDRSui:
@ -265,11 +296,21 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
case AArch64::LDURSi:
case AArch64::LDRHHui:
case AArch64::LDURHHi:
case AArch64::LDRBBui:
case AArch64::LDURBBi:
return Opc;
case AArch64::LDRSWui:
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
case AArch64::LDRSBWui:
return AArch64::LDRBBui;
case AArch64::LDRSHWui:
return AArch64::LDRHHui;
case AArch64::LDURSBWi:
return AArch64::LDURBBi;
case AArch64::LDURSHWi:
return AArch64::LDURHHi;
}
}
@ -311,9 +352,17 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDURSWi:
return AArch64::LDPSWi;
case AArch64::LDRHHui:
case AArch64::LDRSHWui:
return AArch64::LDRWui;
case AArch64::LDURHHi:
case AArch64::LDURSHWi:
return AArch64::LDURWi;
case AArch64::LDRBBui:
case AArch64::LDRSBWui:
return AArch64::LDRHHui;
case AArch64::LDURBBi:
case AArch64::LDURSBWi:
return AArch64::LDURHHi;
}
}
@ -535,16 +584,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
if (isSmallTypeLdMerge(Opc)) {
// Change the scaled offset from small to large type.
if (!IsUnscaled)
if (!IsUnscaled) {
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
OffsetImm /= 2;
}
MachineInstr *RtNewDest = MergeForward ? I : Paired;
// When merging small (< 32 bit) loads for big-endian targets, the order of
// the component parts gets swapped.
if (!Subtarget->isLittleEndian())
std::swap(RtMI, Rt2MI);
// Construct the new load instruction.
// FIXME: currently we support only halfword unsigned load. We need to
// handle byte type, signed, and store instructions as well.
MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(NewOpc))
@ -564,35 +613,61 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
DEBUG(dbgs() << " with instructions:\n ");
DEBUG((NewMemMI)->print(dbgs()));
int Width = getMemScale(I) == 1 ? 8 : 16;
int LSBLow = 0;
int LSBHigh = Width;
int ImmsLow = LSBLow + Width - 1;
int ImmsHigh = LSBHigh + Width - 1;
MachineInstr *ExtDestMI = MergeForward ? Paired : I;
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
// Create the bitfield extract for high half.
// Create the bitfield extract for high bits.
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(AArch64::UBFMWri))
TII->get(getBitExtrOpcode(Rt2MI)))
.addOperand(getLdStRegOp(Rt2MI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(16)
.addImm(31);
// Create the bitfield extract for low half.
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(AArch64::ANDWri))
.addOperand(getLdStRegOp(RtMI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(15);
.addImm(LSBHigh)
.addImm(ImmsHigh);
// Create the bitfield extract for low bits.
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
// For unsigned, prefer to use AND for low bits.
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(AArch64::ANDWri))
.addOperand(getLdStRegOp(RtMI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(ImmsLow);
} else {
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(getBitExtrOpcode(RtMI)))
.addOperand(getLdStRegOp(RtMI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(LSBLow)
.addImm(ImmsLow);
}
} else {
// Create the bitfield extract for low half.
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(AArch64::ANDWri))
.addOperand(getLdStRegOp(RtMI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(15);
// Create the bitfield extract for high half.
// Create the bitfield extract for low bits.
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
// For unsigned, prefer to use AND for low bits.
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(AArch64::ANDWri))
.addOperand(getLdStRegOp(RtMI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(ImmsLow);
} else {
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(getBitExtrOpcode(RtMI)))
.addOperand(getLdStRegOp(RtMI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(LSBLow)
.addImm(ImmsLow);
}
// Create the bitfield extract for high bits.
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
TII->get(AArch64::UBFMWri))
TII->get(getBitExtrOpcode(Rt2MI)))
.addOperand(getLdStRegOp(Rt2MI))
.addReg(getLdStRegOp(RtNewDest).getReg())
.addImm(16)
.addImm(31);
.addImm(LSBHigh)
.addImm(ImmsHigh);
}
DEBUG(dbgs() << " ");
DEBUG((BitExtMI1)->print(dbgs()));
@ -1173,7 +1248,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
bool enableNarrowLdOpt) {
bool Modified = false;
// Three tranformations to do here:
// 1) Find halfword loads that can be merged into a single 32-bit word load
// 1) Find narrow loads that can be converted into a single wider load
// with bitfield extract instructions.
// e.g.,
// ldrh w0, [x2]
@ -1206,9 +1281,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
break;
// Scaled instructions.
case AArch64::LDRBBui:
case AArch64::LDRHHui:
case AArch64::LDRSBWui:
case AArch64::LDRSHWui:
// Unscaled instructions.
case AArch64::LDURHHi: {
case AArch64::LDURBBi:
case AArch64::LDURHHi:
case AArch64::LDURSBWi:
case AArch64::LDURSHWi: {
if (tryToMergeLdStInst(MBBI)) {
Modified = true;
break;

View File

@ -60,3 +60,259 @@ define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
%add14 = sub nuw nsw i16 %add9, %l3
ret i16 %add14
}
; CHECK-LABEL: Ldrsh_merge
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrsh_merge(i16* %p) nounwind {
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
%tmp = load i16, i16* %add.ptr0
%add.ptr = getelementptr inbounds i16, i16* %p, i64 5
%tmp1 = load i16, i16* %add.ptr
%sexttmp = sext i16 %tmp to i32
%sexttmp1 = sext i16 %tmp1 to i32
%add = sub nsw i32 %sexttmp1, %sexttmp
ret i32 %add
}
; CHECK-LABEL: Ldrsh_zsext_merge
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
%tmp = load i16, i16* %add.ptr0
%add.ptr = getelementptr inbounds i16, i16* %p, i64 5
%tmp1 = load i16, i16* %add.ptr
%sexttmp = zext i16 %tmp to i32
%sexttmp1 = sext i16 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldrsh_szext_merge
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
%tmp = load i16, i16* %add.ptr0
%add.ptr = getelementptr inbounds i16, i16* %p, i64 5
%tmp1 = load i16, i16* %add.ptr
%sexttmp = sext i16 %tmp to i32
%sexttmp1 = zext i16 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldrb_merge
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrb_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
%tmp1 = load i8, i8* %add.ptr
%sexttmp = zext i8 %tmp to i32
%sexttmp1 = zext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldrsb_merge
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrsb_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
%tmp1 = load i8, i8* %add.ptr
%sexttmp = sext i8 %tmp to i32
%sexttmp1 = sext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldrsb_zsext_merge
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
%tmp1 = load i8, i8* %add.ptr
%sexttmp = zext i8 %tmp to i32
%sexttmp1 = sext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldrsb_szext_merge
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
%tmp1 = load i8, i8* %add.ptr
%sexttmp = sext i8 %tmp to i32
%sexttmp1 = zext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldursh_merge
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldursh_merge(i16* %p) nounwind {
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
%tmp = load i16, i16* %add.ptr0
%add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
%tmp1 = load i16, i16* %add.ptr
%sexttmp = sext i16 %tmp to i32
%sexttmp1 = sext i16 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldursh_zsext_merge
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
; LE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; BE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
%tmp = load i16, i16* %add.ptr0
%add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
%tmp1 = load i16, i16* %add.ptr
%sexttmp = zext i16 %tmp to i32
%sexttmp1 = sext i16 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldursh_szext_merge
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
; LE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
; BE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldursh_szext_merge(i16* %p) nounwind {
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
%tmp = load i16, i16* %add.ptr0
%add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
%tmp1 = load i16, i16* %add.ptr
%sexttmp = sext i16 %tmp to i32
%sexttmp1 = zext i16 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldurb_merge
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
; CHECK-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldurb_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
%tmp1 = load i8, i8* %add.ptr
%sexttmp = zext i8 %tmp to i32
%sexttmp1 = zext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldursb_merge
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldursb_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
%tmp1 = load i8, i8* %add.ptr
%sexttmp = sext i8 %tmp to i32
%sexttmp1 = sext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldursb_zsext_merge
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
%tmp1 = load i8, i8* %add.ptr
%sexttmp = zext i8 %tmp to i32
%sexttmp1 = sext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}
; CHECK-LABEL: Ldursb_szext_merge
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
define i32 @Ldursb_szext_merge(i8* %p) nounwind {
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
%tmp = load i8, i8* %add.ptr0
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
%tmp1 = load i8, i8* %add.ptr
%sexttmp = sext i8 %tmp to i32
%sexttmp1 = zext i8 %tmp1 to i32
%add = sub nsw i32 %sexttmp, %sexttmp1
ret i32 %add
}