[Hexagon] Implement frame pointer elimination with -fomit-frame-pointer

It applies to leaf functions that are otherwise not required to have
a frame pointer.

llvm-svn: 306888
This commit is contained in:
Krzysztof Parzyszek 2017-06-30 21:21:40 +00:00
parent 2fb1075f14
commit 9eb75c4520
6 changed files with 245 additions and 140 deletions

View File

@ -178,8 +178,8 @@ static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
cl::init(false), cl::ZeroOrMore);
static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
cl::Hidden, cl::desc("Use allocframe more conservatively"));
static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
cl::init(true), cl::desc("Optimize spill slots"));
@ -550,7 +550,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
DebugLoc dl;
unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
@ -584,77 +583,56 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
MI->eraseFromParent();
}
if (!hasFP(MF))
return;
DebugLoc dl = MBB.findDebugLoc(InsertPt);
// Check for overflow.
// Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
const unsigned int ALLOCFRAME_MAX = 16384;
// Create a dummy memory operand to avoid allocframe from being treated as
// a volatile memory reference.
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOStore,
4, 4);
if (NumBytes >= ALLOCFRAME_MAX) {
// Emit allocframe(#0).
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
.addImm(0)
.addMemOperand(MMO);
// Subtract offset from frame pointer.
// We use a caller-saved non-parameter register for that.
unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32),
CallerSavedReg).addImm(NumBytes);
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
if (hasFP(MF)) {
insertAllocframe(MBB, InsertPt, NumBytes);
if (AlignStack) {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
.addReg(SP)
.addImm(-int64_t(MaxAlign));
}
// If the stack-checking is enabled, and we spilled the callee-saved
// registers inline (i.e. did not use a spill function), then call
// the stack checker directly.
if (EnableStackOVFSanitizer && !PrologueStubs)
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
.addExternalSymbol("__runtime_stack_check");
} else if (NumBytes > 0) {
assert(alignTo(NumBytes, 8) == NumBytes);
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
.addReg(CallerSavedReg);
} else {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
.addImm(NumBytes)
.addMemOperand(MMO);
.addImm(-int(NumBytes));
}
if (AlignStack) {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
.addReg(SP)
.addImm(-int64_t(MaxAlign));
}
// If the stack-checking is enabled, and we spilled the callee-saved
// registers inline (i.e. did not use a spill function), then call
// the stack checker directly.
if (EnableStackOVFSanitizer && !PrologueStubs)
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
.addExternalSymbol("__runtime_stack_check");
}
void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
MachineFunction &MF = *MBB.getParent();
if (!hasFP(MF))
return;
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
unsigned SP = HRI.getStackRegister();
MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
DebugLoc dl = MBB.findDebugLoc(InsertPt);
if (!hasFP(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
if (unsigned NumBytes = MFI.getStackSize()) {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
.addImm(NumBytes);
}
return;
}
MachineInstr *RetI = getReturn(MBB);
unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();
DebugLoc DL;
if (InsertPt != MBB.end())
DL = InsertPt->getDebugLoc();
else if (!MBB.empty())
DL = std::prev(MBB.end())->getDebugLoc();
// Handle EH_RETURN.
if (RetOpc == Hexagon::EH_RETURN_JMPR) {
BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::A2_add), SP)
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
.addReg(SP)
.addReg(Hexagon::R28);
return;
@ -699,16 +677,52 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
// otherwise just add deallocframe. The function could be returning via a
// tail call.
if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe));
return;
}
unsigned NewOpc = Hexagon::L4_return;
MachineInstr *NewI = BuildMI(MBB, RetI, DL, HII.get(NewOpc));
MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc));
// Transfer the function live-out registers.
NewI->copyImplicitOps(MF, *RetI);
MBB.erase(RetI);
}
void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
MachineFunction &MF = *MBB.getParent();
auto &HST = MF.getSubtarget<HexagonSubtarget>();
auto &HII = *HST.getInstrInfo();
auto &HRI = *HST.getRegisterInfo();
// Check for overflow.
// Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
const unsigned int ALLOCFRAME_MAX = 16384;
// Create a dummy memory operand to avoid allocframe from being treated as
// a volatile memory reference.
auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),
MachineMemOperand::MOStore, 4, 4);
DebugLoc dl = MBB.findDebugLoc(InsertPt);
if (NumBytes >= ALLOCFRAME_MAX) {
// Emit allocframe(#0).
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
.addImm(0)
.addMemOperand(MMO);
// Subtract the size from the stack pointer.
unsigned SP = HRI.getStackRegister();
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
.addReg(SP)
.addImm(-int(NumBytes));
} else {
BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
.addImm(NumBytes)
.addMemOperand(MMO);
}
}
void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
MachineBasicBlock &SaveB) const {
SetVector<unsigned> Worklist;
@ -928,12 +942,11 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
}
bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
return false;
auto &MFI = MF.getFrameInfo();
auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
bool HasFixed = MFI.getNumFixedObjects();
bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
.getLocalFrameObjectCount();
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool HasAlloca = MFI.hasVarSizedObjects();
@ -947,18 +960,35 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
// By default we want to use SP (since it's always there). FP requires
// some setup (i.e. ALLOCFRAME).
// Fixed and preallocated objects need FP if the distance from them to
// the SP is unknown (as is with alloca or aligna).
if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
// Both, alloca and stack alignment modify the stack pointer by an
// undetermined value, so we need to save it at the entry to the function
// (i.e. use allocframe).
if (HasAlloca || HasExtraAlign)
return true;
if (MFI.getStackSize() > 0) {
if (EnableStackOVFSanitizer || UseAllocframe)
// If FP-elimination is disabled, we have to use FP at this point.
const TargetMachine &TM = MF.getTarget();
if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)
return true;
if (EnableStackOVFSanitizer)
return true;
}
if (MFI.hasCalls() ||
MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
if (MFI.hasCalls() || HMFI.hasClobberLR())
return true;
// Frame pointer elimination is a possiblility at this point, but
// to know if FP is necessary we need to know if spill/restore
// functions will be used (they require FP to be valid).
// This means that hasFP shouldn't really be called before CSI is
// calculated, and some measures are taken to make sure of that
// (e.g. default implementations of virtual functions that call it
// are overridden apropriately).
assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI");
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI))
return true;
return false;
@ -1051,9 +1081,10 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
bool HasExtraAlign = HRI.needsStackRealignment(MF);
bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
unsigned FrameSize = MFI.getStackSize();
unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
unsigned FrameSize = MFI.getStackSize();
unsigned SP = HRI.getStackRegister();
unsigned FP = HRI.getFrameRegister();
unsigned AP = HMFI.getStackAlignBasePhysReg();
// It may happen that AP will be absent even HasAlloca && HasExtraAlign
// is true. HasExtraAlign may be set because of vector spills, without
@ -1135,7 +1166,7 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
// there will be no SP -= FrameSize), so the frame size should not be
// added to the calculated offset.
int RealOffset = Offset;
if (!UseFP && !UseAP && HasFP)
if (!UseFP && !UseAP)
RealOffset = FrameSize+Offset;
return RealOffset;
}
@ -2402,7 +2433,7 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
/// be generated via inline code. If this function returns "true", inline
/// code will be generated. If this function returns "false", additional
/// checks are performed, which may still lead to the inline code.
bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
const CSIVect &CSI) const {
if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())
return true;
@ -2432,7 +2463,7 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
return false;
}
bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;
@ -2445,7 +2476,7 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
return Threshold < NumCSI;
}
bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
const CSIVect &CSI) const {
if (shouldInlineCSR(MF, CSI))
return false;

View File

@ -48,6 +48,15 @@ public:
return true;
}
bool hasReservedCallFrame(const MachineFunction &MF) const override {
// We always reserve call frame as a part of the initial stack allocation.
return true;
}
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override {
// Override this function to avoid calling hasFP before CSI is set
// (the default implementation calls hasFP).
return true;
}
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const override;
@ -94,6 +103,8 @@ private:
unsigned SP, unsigned CF) const;
void insertPrologueInBlock(MachineBasicBlock &MBB, bool PrologueStubs) const;
void insertEpilogueInBlock(MachineBasicBlock &MBB) const;
void insertAllocframe(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const;
bool insertCSRSpillsInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
const HexagonRegisterInfo &HRI, bool &PrologueStubs) const;
bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
@ -148,9 +159,9 @@ private:
void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI,
bool IsDef, bool IsKill) const;
bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const;
bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
bool shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const;
bool useSpillFunction(const MachineFunction &MF, const CSIVect &CSI) const;
bool useRestoreFunction(const MachineFunction &MF, const CSIVect &CSI) const;
bool mayOverflowFrameOffset(MachineFunction &MF) const;
};

View File

@ -1002,51 +1002,46 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SDValue
HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
MachineFunction &MF = DAG.getMachineFunction();
auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
switch (Node->getOpcode()) {
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
--NumOps; // Ignore the flag operand.
auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
unsigned LR = HRI.getRARegister();
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
if (FuncInfo.hasClobberLR())
break;
unsigned Flags =
cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
++i; // Skip the ID value.
if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
return Op;
switch (InlineAsm::getKind(Flags)) {
default: llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegDef:
case InlineAsm::Kind_RegUse:
case InlineAsm::Kind_Imm:
case InlineAsm::Kind_Clobber:
case InlineAsm::Kind_Mem: {
for (; NumVals; --NumVals, ++i) {}
break;
}
case InlineAsm::Kind_RegDefEarlyClobber: {
for (; NumVals; --NumVals, ++i) {
unsigned Reg =
cast<RegisterSDNode>(Node->getOperand(i))->getReg();
unsigned NumOps = Op.getNumOperands();
if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
--NumOps; // Ignore the flag operand.
// Check it to be lr
const HexagonRegisterInfo *QRI = Subtarget.getRegisterInfo();
if (Reg == QRI->getRARegister()) {
FuncInfo.setHasClobberLR(true);
break;
}
}
break;
}
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
++i; // Skip the ID value.
switch (InlineAsm::getKind(Flags)) {
default:
llvm_unreachable("Bad flags!");
case InlineAsm::Kind_RegUse:
case InlineAsm::Kind_Imm:
case InlineAsm::Kind_Mem:
i += NumVals;
break;
case InlineAsm::Kind_Clobber:
case InlineAsm::Kind_RegDef:
case InlineAsm::Kind_RegDefEarlyClobber: {
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
if (Reg != LR)
continue;
HMFI.setHasClobberLR(true);
return Op;
}
break;
}
}
} // Node->getOpcode
}
return Op;
}

View File

@ -0,0 +1,91 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
target triple = "hexagon"
; FP elimination enabled.
;
; CHECK-LABEL: danny:
; CHECK: r29 = add(r29,#-[[SIZE:[0-9]+]])
; CHECK: r29 = add(r29,#[[SIZE]])
define i32 @danny(i32 %a0, i32 %a1) local_unnamed_addr #0 {
b2:
%v3 = alloca [32 x i32], align 8
%v4 = bitcast [32 x i32]* %v3 to i8*
call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %v4) #3
br label %b5
b5: ; preds = %b5, %b2
%v6 = phi i32 [ 0, %b2 ], [ %v8, %b5 ]
%v7 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v6
store i32 %v6, i32* %v7, align 4
%v8 = add nuw nsw i32 %v6, 1
%v9 = icmp eq i32 %v8, 32
br i1 %v9, label %b10, label %b5
b10: ; preds = %b5
%v11 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %a0
store i32 %a1, i32* %v11, align 4
br label %b12
b12: ; preds = %b12, %b10
%v13 = phi i32 [ 0, %b10 ], [ %v18, %b12 ]
%v14 = phi i32 [ 0, %b10 ], [ %v17, %b12 ]
%v15 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v13
%v16 = load i32, i32* %v15, align 4
%v17 = add nsw i32 %v16, %v14
%v18 = add nuw nsw i32 %v13, 1
%v19 = icmp eq i32 %v18, 32
br i1 %v19, label %b20, label %b12
b20: ; preds = %b12
call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %v4) #3
ret i32 %v17
}
; FP elimination disabled.
;
; CHECK-LABEL: sammy:
; CHECK: allocframe
; CHECK: dealloc_return
define i32 @sammy(i32 %a0, i32 %a1) local_unnamed_addr #1 {
b2:
%v3 = alloca [32 x i32], align 8
%v4 = bitcast [32 x i32]* %v3 to i8*
call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %v4) #3
br label %b5
b5: ; preds = %b5, %b2
%v6 = phi i32 [ 0, %b2 ], [ %v8, %b5 ]
%v7 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v6
store i32 %v6, i32* %v7, align 4
%v8 = add nuw nsw i32 %v6, 1
%v9 = icmp eq i32 %v8, 32
br i1 %v9, label %b10, label %b5
b10: ; preds = %b5
%v11 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %a0
store i32 %a1, i32* %v11, align 4
br label %b12
b12: ; preds = %b12, %b10
%v13 = phi i32 [ 0, %b10 ], [ %v18, %b12 ]
%v14 = phi i32 [ 0, %b10 ], [ %v17, %b12 ]
%v15 = getelementptr inbounds [32 x i32], [32 x i32]* %v3, i32 0, i32 %v13
%v16 = load i32, i32* %v15, align 4
%v17 = add nsw i32 %v16, %v14
%v18 = add nuw nsw i32 %v13, 1
%v19 = icmp eq i32 %v18, 32
br i1 %v19, label %b20, label %b12
b20: ; preds = %b12
call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %v4) #3
ret i32 %v17
}
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
attributes #0 = { nounwind readnone "no-frame-pointer-elim"="false" "target-cpu"="hexagonv60" }
attributes #1 = { nounwind readnone "no-frame-pointer-elim"="true" "target-cpu"="hexagonv60" }
attributes #2 = { argmemonly nounwind }
attributes #3 = { nounwind }

View File

@ -1,23 +0,0 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
@num = external global i32
@acc = external global i32
@num2 = external global i32
; CHECK: allocframe
; CHECK: dealloc_return
define i32 @foo() nounwind {
entry:
%i = alloca i32, align 4
%0 = load i32, i32* @num, align 4
store i32 %0, i32* %i, align 4
%1 = load i32, i32* %i, align 4
%2 = load i32, i32* @acc, align 4
%mul = mul nsw i32 %1, %2
%3 = load i32, i32* @num2, align 4
%add = add nsw i32 %mul, %3
store i32 %add, i32* %i, align 4
%4 = load i32, i32* %i, align 4
ret i32 %4
}

View File

@ -6,7 +6,7 @@
@Reg = common global i32 0, align 4
define i32 @main() nounwind {
entry:
; CHECK: if (cmp.gt(r{{[0-9]+}},r{{[0-9]+}}.new)) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
; CHECK: if (cmp.gt(r{{[0-9]+}}.new,r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
%Reg2 = alloca i32, align 4
%0 = load i32, i32* %Reg2, align 4
%1 = load i32, i32* @Reg, align 4