[AArch64]Extend merging narrow loads into a wider load
This change extends r251438 to handle more narrow load promotions including byte type, unscaled, and signed. For example, this change will convert : ldursh w1, [x0, #-2] ldurh w2, [x0, #-4] into ldur w2, [x0, #-4] asr w1, w2, #16 and w2, w2, #0xffff llvm-svn: 253577
This commit is contained in:
parent
89d7ff5de6
commit
4c35ccac91
|
@ -161,6 +161,9 @@ static bool isUnscaledLdSt(unsigned Opc) {
|
|||
case AArch64::LDURXi:
|
||||
case AArch64::LDURSWi:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDURSBWi:
|
||||
case AArch64::LDURSHWi:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -169,16 +172,39 @@ static bool isUnscaledLdSt(MachineInstr *MI) {
|
|||
return isUnscaledLdSt(MI->getOpcode());
|
||||
}
|
||||
|
||||
static unsigned getBitExtrOpcode(MachineInstr *MI) {
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected opcode.");
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDURHHi:
|
||||
return AArch64::UBFMWri;
|
||||
case AArch64::LDRSBWui:
|
||||
case AArch64::LDURSBWi:
|
||||
case AArch64::LDRSHWui:
|
||||
case AArch64::LDURSHWi:
|
||||
return AArch64::SBFMWri;
|
||||
}
|
||||
}
|
||||
|
||||
static bool isSmallTypeLdMerge(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
default:
|
||||
return false;
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDRSHWui:
|
||||
case AArch64::LDURSHWi:
|
||||
case AArch64::LDRSBWui:
|
||||
case AArch64::LDURSBWi:
|
||||
return true;
|
||||
// FIXME: Add other instructions (e.g, LDRBBui, LDURSHWi, LDRSHWui, etc.).
|
||||
}
|
||||
}
|
||||
|
||||
static bool isSmallTypeLdMerge(MachineInstr *MI) {
|
||||
return isSmallTypeLdMerge(MI->getOpcode());
|
||||
}
|
||||
|
@ -189,10 +215,15 @@ static int getMemScale(MachineInstr *MI) {
|
|||
default:
|
||||
llvm_unreachable("Opcode has unknown scale!");
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDRSBWui:
|
||||
case AArch64::LDURSBWi:
|
||||
case AArch64::STRBBui:
|
||||
return 1;
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDRSHWui:
|
||||
case AArch64::LDURSHWi:
|
||||
case AArch64::STRHHui:
|
||||
return 2;
|
||||
case AArch64::LDRSui:
|
||||
|
@ -265,11 +296,21 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
|
|||
case AArch64::LDURSi:
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDURBBi:
|
||||
return Opc;
|
||||
case AArch64::LDRSWui:
|
||||
return AArch64::LDRWui;
|
||||
case AArch64::LDURSWi:
|
||||
return AArch64::LDURWi;
|
||||
case AArch64::LDRSBWui:
|
||||
return AArch64::LDRBBui;
|
||||
case AArch64::LDRSHWui:
|
||||
return AArch64::LDRHHui;
|
||||
case AArch64::LDURSBWi:
|
||||
return AArch64::LDURBBi;
|
||||
case AArch64::LDURSHWi:
|
||||
return AArch64::LDURHHi;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -311,9 +352,17 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
|
|||
case AArch64::LDURSWi:
|
||||
return AArch64::LDPSWi;
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDRSHWui:
|
||||
return AArch64::LDRWui;
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDURSHWi:
|
||||
return AArch64::LDURWi;
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDRSBWui:
|
||||
return AArch64::LDRHHui;
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDURSBWi:
|
||||
return AArch64::LDURHHi;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -535,16 +584,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
|||
|
||||
if (isSmallTypeLdMerge(Opc)) {
|
||||
// Change the scaled offset from small to large type.
|
||||
if (!IsUnscaled)
|
||||
if (!IsUnscaled) {
|
||||
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
|
||||
OffsetImm /= 2;
|
||||
}
|
||||
MachineInstr *RtNewDest = MergeForward ? I : Paired;
|
||||
// When merging small (< 32 bit) loads for big-endian targets, the order of
|
||||
// the component parts gets swapped.
|
||||
if (!Subtarget->isLittleEndian())
|
||||
std::swap(RtMI, Rt2MI);
|
||||
// Construct the new load instruction.
|
||||
// FIXME: currently we support only halfword unsigned load. We need to
|
||||
// handle byte type, signed, and store instructions as well.
|
||||
MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
|
||||
NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(NewOpc))
|
||||
|
@ -564,35 +613,61 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
|
|||
DEBUG(dbgs() << " with instructions:\n ");
|
||||
DEBUG((NewMemMI)->print(dbgs()));
|
||||
|
||||
int Width = getMemScale(I) == 1 ? 8 : 16;
|
||||
int LSBLow = 0;
|
||||
int LSBHigh = Width;
|
||||
int ImmsLow = LSBLow + Width - 1;
|
||||
int ImmsHigh = LSBHigh + Width - 1;
|
||||
MachineInstr *ExtDestMI = MergeForward ? Paired : I;
|
||||
if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
|
||||
// Create the bitfield extract for high half.
|
||||
// Create the bitfield extract for high bits.
|
||||
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::UBFMWri))
|
||||
TII->get(getBitExtrOpcode(Rt2MI)))
|
||||
.addOperand(getLdStRegOp(Rt2MI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(16)
|
||||
.addImm(31);
|
||||
// Create the bitfield extract for low half.
|
||||
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::ANDWri))
|
||||
.addOperand(getLdStRegOp(RtMI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(15);
|
||||
.addImm(LSBHigh)
|
||||
.addImm(ImmsHigh);
|
||||
// Create the bitfield extract for low bits.
|
||||
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
|
||||
// For unsigned, prefer to use AND for low bits.
|
||||
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::ANDWri))
|
||||
.addOperand(getLdStRegOp(RtMI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(ImmsLow);
|
||||
} else {
|
||||
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(getBitExtrOpcode(RtMI)))
|
||||
.addOperand(getLdStRegOp(RtMI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(LSBLow)
|
||||
.addImm(ImmsLow);
|
||||
}
|
||||
} else {
|
||||
// Create the bitfield extract for low half.
|
||||
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::ANDWri))
|
||||
.addOperand(getLdStRegOp(RtMI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(15);
|
||||
// Create the bitfield extract for high half.
|
||||
// Create the bitfield extract for low bits.
|
||||
if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
|
||||
// For unsigned, prefer to use AND for low bits.
|
||||
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::ANDWri))
|
||||
.addOperand(getLdStRegOp(RtMI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(ImmsLow);
|
||||
} else {
|
||||
BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(getBitExtrOpcode(RtMI)))
|
||||
.addOperand(getLdStRegOp(RtMI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(LSBLow)
|
||||
.addImm(ImmsLow);
|
||||
}
|
||||
|
||||
// Create the bitfield extract for high bits.
|
||||
BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
|
||||
TII->get(AArch64::UBFMWri))
|
||||
TII->get(getBitExtrOpcode(Rt2MI)))
|
||||
.addOperand(getLdStRegOp(Rt2MI))
|
||||
.addReg(getLdStRegOp(RtNewDest).getReg())
|
||||
.addImm(16)
|
||||
.addImm(31);
|
||||
.addImm(LSBHigh)
|
||||
.addImm(ImmsHigh);
|
||||
}
|
||||
DEBUG(dbgs() << " ");
|
||||
DEBUG((BitExtMI1)->print(dbgs()));
|
||||
|
@ -1173,7 +1248,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
|
|||
bool enableNarrowLdOpt) {
|
||||
bool Modified = false;
|
||||
// Three tranformations to do here:
|
||||
// 1) Find halfword loads that can be merged into a single 32-bit word load
|
||||
// 1) Find narrow loads that can be converted into a single wider load
|
||||
// with bitfield extract instructions.
|
||||
// e.g.,
|
||||
// ldrh w0, [x2]
|
||||
|
@ -1206,9 +1281,15 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
|
|||
++MBBI;
|
||||
break;
|
||||
// Scaled instructions.
|
||||
case AArch64::LDRBBui:
|
||||
case AArch64::LDRHHui:
|
||||
case AArch64::LDRSBWui:
|
||||
case AArch64::LDRSHWui:
|
||||
// Unscaled instructions.
|
||||
case AArch64::LDURHHi: {
|
||||
case AArch64::LDURBBi:
|
||||
case AArch64::LDURHHi:
|
||||
case AArch64::LDURSBWi:
|
||||
case AArch64::LDURSHWi: {
|
||||
if (tryToMergeLdStInst(MBBI)) {
|
||||
Modified = true;
|
||||
break;
|
||||
|
|
|
@ -60,3 +60,259 @@ define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
|
|||
%add14 = sub nuw nsw i16 %add9, %l3
|
||||
ret i16 %add14
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrsh_merge
|
||||
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
|
||||
define i32 @Ldrsh_merge(i16* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
|
||||
%tmp = load i16, i16* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 5
|
||||
%tmp1 = load i16, i16* %add.ptr
|
||||
%sexttmp = sext i16 %tmp to i32
|
||||
%sexttmp1 = sext i16 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp1, %sexttmp
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrsh_zsext_merge
|
||||
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
|
||||
; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
|
||||
%tmp = load i16, i16* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 5
|
||||
%tmp1 = load i16, i16* %add.ptr
|
||||
%sexttmp = zext i16 %tmp to i32
|
||||
%sexttmp1 = sext i16 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrsh_szext_merge
|
||||
; CHECK: ldr [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
|
||||
; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
|
||||
%tmp = load i16, i16* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 5
|
||||
%tmp1 = load i16, i16* %add.ptr
|
||||
%sexttmp = sext i16 %tmp to i32
|
||||
%sexttmp1 = zext i16 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrb_merge
|
||||
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
|
||||
; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldrb_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = zext i8 %tmp to i32
|
||||
%sexttmp1 = zext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrsb_merge
|
||||
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldrsb_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = sext i8 %tmp to i32
|
||||
%sexttmp1 = sext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrsb_zsext_merge
|
||||
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
|
||||
; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = zext i8 %tmp to i32
|
||||
%sexttmp1 = sext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldrsb_szext_merge
|
||||
; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
|
||||
; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 3
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = sext i8 %tmp to i32
|
||||
%sexttmp1 = zext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldursh_merge
|
||||
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldursh_merge(i16* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
|
||||
%tmp = load i16, i16* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
|
||||
%tmp1 = load i16, i16* %add.ptr
|
||||
%sexttmp = sext i16 %tmp to i32
|
||||
%sexttmp1 = sext i16 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldursh_zsext_merge
|
||||
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; BE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
|
||||
%tmp = load i16, i16* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
|
||||
%tmp1 = load i16, i16* %add.ptr
|
||||
%sexttmp = zext i16 %tmp to i32
|
||||
%sexttmp1 = sext i16 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldursh_szext_merge
|
||||
; CHECK: ldur [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
|
||||
; BE-DAG: lsr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
|
||||
; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldursh_szext_merge(i16* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
|
||||
%tmp = load i16, i16* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
|
||||
%tmp1 = load i16, i16* %add.ptr
|
||||
%sexttmp = sext i16 %tmp to i32
|
||||
%sexttmp1 = zext i16 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldurb_merge
|
||||
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldurb_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = zext i8 %tmp to i32
|
||||
%sexttmp1 = zext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldursb_merge
|
||||
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
|
||||
; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldursb_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = sext i8 %tmp to i32
|
||||
%sexttmp1 = sext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldursb_zsext_merge
|
||||
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = zext i8 %tmp to i32
|
||||
%sexttmp1 = sext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: Ldursb_szext_merge
|
||||
; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
|
||||
; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
|
||||
; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
|
||||
; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
|
||||
; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
|
||||
; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
|
||||
define i32 @Ldursb_szext_merge(i8* %p) nounwind {
|
||||
%add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
|
||||
%tmp = load i8, i8* %add.ptr0
|
||||
%add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
|
||||
%tmp1 = load i8, i8* %add.ptr
|
||||
%sexttmp = sext i8 %tmp to i32
|
||||
%sexttmp1 = zext i8 %tmp1 to i32
|
||||
%add = sub nsw i32 %sexttmp, %sexttmp1
|
||||
ret i32 %add
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue