This patch implements atomic intrinsics atomic.load.add (sub,and,or,xor,

nand), atomic.swap and atomic.cmp.swap, all in i8, i16 and i32 versions.
The intrinsics are implemented by creating pseudo-instructions, which are
then expanded in the method MipsTargetLowering::EmitInstrWithCustomInserter.

Patch by Sasa Stankovic.

llvm-svn: 132323
This commit is contained in:
Bruno Cardoso Lopes 2011-05-31 02:54:07 +00:00
parent bf3c1251e0
commit 98fc4c8bbc
5 changed files with 917 additions and 6 deletions

View File

@ -557,11 +557,6 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
MachineBasicBlock *
MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
// There is no need to expand CMov instructions if target has
// conditional moves.
if (Subtarget->hasCondMov())
return BB;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
bool isFPCmp = false;
DebugLoc dl = MI->getDebugLoc();
@ -569,6 +564,63 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
switch (MI->getOpcode()) {
default: assert(false && "Unexpected instr type to insert");
case Mips::ATOMIC_LOAD_ADD_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
case Mips::ATOMIC_LOAD_ADD_I32:
return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
case Mips::ATOMIC_LOAD_AND_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
case Mips::ATOMIC_LOAD_AND_I32:
return EmitAtomicBinary(MI, BB, 4, Mips::AND);
case Mips::ATOMIC_LOAD_OR_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
case Mips::ATOMIC_LOAD_OR_I32:
return EmitAtomicBinary(MI, BB, 4, Mips::OR);
case Mips::ATOMIC_LOAD_XOR_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
case Mips::ATOMIC_LOAD_XOR_I32:
return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
case Mips::ATOMIC_LOAD_NAND_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
case Mips::ATOMIC_LOAD_NAND_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
case Mips::ATOMIC_LOAD_NAND_I32:
return EmitAtomicBinary(MI, BB, 4, 0, true);
case Mips::ATOMIC_LOAD_SUB_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
case Mips::ATOMIC_LOAD_SUB_I32:
return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
case Mips::ATOMIC_SWAP_I8:
return EmitAtomicBinaryPartword(MI, BB, 1, 0);
case Mips::ATOMIC_SWAP_I16:
return EmitAtomicBinaryPartword(MI, BB, 2, 0);
case Mips::ATOMIC_SWAP_I32:
return EmitAtomicBinary(MI, BB, 4, 0);
case Mips::ATOMIC_CMP_SWAP_I8:
return EmitAtomicCmpSwapPartword(MI, BB, 1);
case Mips::ATOMIC_CMP_SWAP_I16:
return EmitAtomicCmpSwapPartword(MI, BB, 2);
case Mips::ATOMIC_CMP_SWAP_I32:
return EmitAtomicCmpSwap(MI, BB, 4);
case Mips::MOVT:
case Mips::MOVT_S:
case Mips::MOVT_D:
@ -593,6 +645,11 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
break;
}
// There is no need to expand CMov instructions if target has
// conditional moves.
if (Subtarget->hasCondMov())
return BB;
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
@ -660,6 +717,471 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
}
// This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and
// Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true)
MachineBasicBlock *
MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand) const {
assert(Size == 4 && "Unsupported size for EmitAtomicBinary.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg();
unsigned Oldval = RegInfo.createVirtualRegister(RC);
unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
// sw incr, fi(sp) // store incr to stack (when BinOpcode == 0)
// fallthrough --> loopMBB
// Note: for atomic.swap (when BinOpcode == 0), storing incr to stack before
// the loop and then loading it from stack in block loopMBB is necessary to
// prevent MachineLICM pass to hoist "or" instruction out of the block
// loopMBB.
int fi;
if (BinOpcode == 0 && !Nand) {
// Get or create a temporary stack location.
MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
fi = MipsFI->getAtomicFrameIndex();
if (fi == -1) {
fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
MipsFI->setAtomicFrameIndex(fi);
}
BuildMI(BB, dl, TII->get(Mips::SW))
.addReg(Incr).addImm(0).addFrameIndex(fi);
}
BB->addSuccessor(loopMBB);
// loopMBB:
// ll oldval, 0(ptr)
// or dest, $0, oldval
// <binop> tmp1, oldval, incr
// sc tmp1, 0(ptr)
// beq tmp1, $0, loopMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Ptr);
BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
if (Nand) {
// and tmp2, oldval, incr
// nor tmp1, $0, tmp2
BuildMI(BB, dl, TII->get(Mips::AND), Tmp2).addReg(Oldval).addReg(Incr);
BuildMI(BB, dl, TII->get(Mips::NOR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
} else if (BinOpcode) {
// <binop> tmp1, oldval, incr
BuildMI(BB, dl, TII->get(BinOpcode), Tmp1).addReg(Oldval).addReg(Incr);
} else {
// lw tmp2, fi(sp) // load incr from stack
// or tmp1, $zero, tmp2
BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);;
BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
}
BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr);
BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
bool Nand) const {
assert((Size == 1 || Size == 2) &&
"Unsupported size for EmitAtomicBinaryPartial.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Incr = MI->getOperand(2).getReg();
unsigned Addr = RegInfo.createVirtualRegister(RC);
unsigned Shift = RegInfo.createVirtualRegister(RC);
unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC);
unsigned Newval = RegInfo.createVirtualRegister(RC);
unsigned Oldval = RegInfo.createVirtualRegister(RC);
unsigned Incr2 = RegInfo.createVirtualRegister(RC);
unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
unsigned Tmp10 = RegInfo.createVirtualRegister(RC);
unsigned Tmp11 = RegInfo.createVirtualRegister(RC);
unsigned Tmp12 = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// addiu tmp1,$0,-4 # 0xfffffffc
// and addr,ptr,tmp1
// andi tmp2,ptr,3
// sll shift,tmp2,3
// ori tmp3,$0,255 # 0xff
// sll mask,tmp3,shift
// nor mask2,$0,mask
// andi tmp4,incr,255
// sll incr2,tmp4,shift
// sw incr2, fi(sp) // store incr2 to stack (when BinOpcode == 0)
// Note: for atomic.swap (when BinOpcode == 0), storing incr2 to stack before
// the loop and then loading it from stack in block loopMBB is necessary to
// prevent MachineLICM pass to hoist "or" instruction out of the block
// loopMBB.
int64_t MaskImm = (Size == 1) ? 255 : 65535;
BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
if (BinOpcode != Mips::SUBu) {
BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Incr).addImm(MaskImm);
BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp4).addReg(Shift);
} else {
BuildMI(BB, dl, TII->get(Mips::SUBu), Tmp4).addReg(Mips::ZERO).addReg(Incr);
BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Tmp4).addImm(MaskImm);
BuildMI(BB, dl, TII->get(Mips::SLL), Incr2).addReg(Tmp5).addReg(Shift);
}
int fi;
if (BinOpcode == 0 && !Nand) {
// Get or create a temporary stack location.
MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
fi = MipsFI->getAtomicFrameIndex();
if (fi == -1) {
fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
MipsFI->setAtomicFrameIndex(fi);
}
BuildMI(BB, dl, TII->get(Mips::SW))
.addReg(Incr2).addImm(0).addFrameIndex(fi);
}
BB->addSuccessor(loopMBB);
// loopMBB:
// ll oldval,0(addr)
// binop tmp7,oldval,incr2
// and newval,tmp7,mask
// and tmp8,oldval,mask2
// or tmp9,tmp8,newval
// sc tmp9,0(addr)
// beq tmp9,$0,loopMBB
BB = loopMBB;
BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Addr);
if (Nand) {
// and tmp6, oldval, incr2
// nor tmp7, $0, tmp6
BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval).addReg(Incr2);
BuildMI(BB, dl, TII->get(Mips::NOR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
} else if (BinOpcode == Mips::SUBu) {
// addu tmp7, oldval, incr2
BuildMI(BB, dl, TII->get(Mips::ADDu), Tmp7).addReg(Oldval).addReg(Incr2);
} else if (BinOpcode) {
// <binop> tmp7, oldval, incr2
BuildMI(BB, dl, TII->get(BinOpcode), Tmp7).addReg(Oldval).addReg(Incr2);
} else {
// lw tmp6, fi(sp) // load incr2 from stack
// or tmp7, $zero, tmp6
BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addImm(0).addFrameIndex(fi);;
BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
}
BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addImm(0).addReg(Addr);
BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
BB->addSuccessor(loopMBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// and tmp10,oldval,mask
// srl tmp11,tmp10,shift
// sll tmp12,tmp11,24
// sra dest,tmp12,24
BB = exitMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
// reverse order
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
.addReg(Tmp12).addImm(ShiftImm);
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp12)
.addReg(Tmp11).addImm(ShiftImm);
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp11)
.addReg(Tmp10).addReg(Shift);
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::AND), Tmp10)
.addReg(Oldval).addReg(Mask);
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Oldval = MI->getOperand(2).getReg();
unsigned Newval = MI->getOperand(3).getReg();
unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// Get or create a temporary stack location.
MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
int fi = MipsFI->getAtomicFrameIndex();
if (fi == -1) {
fi = MF->getFrameInfo()->CreateStackObject(Size, Size, false);
MipsFI->setAtomicFrameIndex(fi);
}
// thisMBB:
// ...
// sw newval, fi(sp) // store newval to stack
// fallthrough --> loop1MBB
// Note: storing newval to stack before the loop and then loading it from
// stack in block loop2MBB is necessary to prevent MachineLICM pass to
// hoist "or" instruction out of the block loop2MBB.
BuildMI(BB, dl, TII->get(Mips::SW))
.addReg(Newval).addImm(0).addFrameIndex(fi);
BB->addSuccessor(loop1MBB);
// loop1MBB:
// ll dest, 0(ptr)
// bne dest, oldval, exitMBB
BB = loop1MBB;
BuildMI(BB, dl, TII->get(Mips::LL), Dest).addImm(0).addReg(Ptr);
BuildMI(BB, dl, TII->get(Mips::BNE))
.addReg(Dest).addReg(Oldval).addMBB(exitMBB);
BB->addSuccessor(exitMBB);
BB->addSuccessor(loop2MBB);
// loop2MBB:
// lw tmp2, fi(sp) // load newval from stack
// or tmp1, $0, tmp2
// sc tmp1, 0(ptr)
// beq tmp1, $0, loop1MBB
BB = loop2MBB;
BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);;
BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr);
BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
MachineBasicBlock *
MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Size) const {
assert((Size == 1 || Size == 2) &&
"Unsupported size for EmitAtomicCmpSwapPartial.");
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &RegInfo = MF->getRegInfo();
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();
unsigned Ptr = MI->getOperand(1).getReg();
unsigned Oldval = MI->getOperand(2).getReg();
unsigned Newval = MI->getOperand(3).getReg();
unsigned Addr = RegInfo.createVirtualRegister(RC);
unsigned Shift = RegInfo.createVirtualRegister(RC);
unsigned Mask = RegInfo.createVirtualRegister(RC);
unsigned Mask2 = RegInfo.createVirtualRegister(RC);
unsigned Oldval2 = RegInfo.createVirtualRegister(RC);
unsigned Oldval3 = RegInfo.createVirtualRegister(RC);
unsigned Oldval4 = RegInfo.createVirtualRegister(RC);
unsigned Newval2 = RegInfo.createVirtualRegister(RC);
unsigned Tmp1 = RegInfo.createVirtualRegister(RC);
unsigned Tmp2 = RegInfo.createVirtualRegister(RC);
unsigned Tmp3 = RegInfo.createVirtualRegister(RC);
unsigned Tmp4 = RegInfo.createVirtualRegister(RC);
unsigned Tmp5 = RegInfo.createVirtualRegister(RC);
unsigned Tmp6 = RegInfo.createVirtualRegister(RC);
unsigned Tmp7 = RegInfo.createVirtualRegister(RC);
unsigned Tmp8 = RegInfo.createVirtualRegister(RC);
unsigned Tmp9 = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator It = BB;
++It;
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
// Transfer the remainder of BB and its successor edges to exitMBB.
exitMBB->splice(exitMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// addiu tmp1,$0,-4 # 0xfffffffc
// and addr,ptr,tmp1
// andi tmp2,ptr,3
// sll shift,tmp2,3
// ori tmp3,$0,255 # 0xff
// sll mask,tmp3,shift
// nor mask2,$0,mask
// andi tmp4,oldval,255
// sll oldval2,tmp4,shift
// andi tmp5,newval,255
// sll newval2,tmp5,shift
int64_t MaskImm = (Size == 1) ? 255 : 65535;
BuildMI(BB, dl, TII->get(Mips::ADDiu), Tmp1).addReg(Mips::ZERO).addImm(-4);
BuildMI(BB, dl, TII->get(Mips::AND), Addr).addReg(Ptr).addReg(Tmp1);
BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp2).addReg(Ptr).addImm(3);
BuildMI(BB, dl, TII->get(Mips::SLL), Shift).addReg(Tmp2).addImm(3);
BuildMI(BB, dl, TII->get(Mips::ORi), Tmp3).addReg(Mips::ZERO).addImm(MaskImm);
BuildMI(BB, dl, TII->get(Mips::SLL), Mask).addReg(Tmp3).addReg(Shift);
BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp4).addReg(Oldval).addImm(MaskImm);
BuildMI(BB, dl, TII->get(Mips::SLL), Oldval2).addReg(Tmp4).addReg(Shift);
BuildMI(BB, dl, TII->get(Mips::ANDi), Tmp5).addReg(Newval).addImm(MaskImm);
BuildMI(BB, dl, TII->get(Mips::SLL), Newval2).addReg(Tmp5).addReg(Shift);
BB->addSuccessor(loop1MBB);
// loop1MBB:
// ll oldval3,0(addr)
// and oldval4,oldval3,mask
// bne oldval4,oldval2,exitMBB
BB = loop1MBB;
BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addImm(0).addReg(Addr);
BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
BuildMI(BB, dl, TII->get(Mips::BNE))
.addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
BB->addSuccessor(exitMBB);
BB->addSuccessor(loop2MBB);
// loop2MBB:
// and tmp6,oldval3,mask2
// or tmp7,tmp6,newval2
// sc tmp7,0(addr)
// beq tmp7,$0,loop1MBB
BB = loop2MBB;
BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
.addReg(Tmp7).addImm(0).addReg(Addr);
BuildMI(BB, dl, TII->get(Mips::BEQ))
.addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
BB->addSuccessor(loop1MBB);
BB->addSuccessor(exitMBB);
// exitMBB:
// srl tmp8,oldval4,shift
// sll tmp9,tmp8,24
// sra dest,tmp9,24
BB = exitMBB;
int64_t ShiftImm = (Size == 1) ? 24 : 16;
// reverse order
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRA), Dest)
.addReg(Tmp9).addImm(ShiftImm);
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SLL), Tmp9)
.addReg(Tmp8).addImm(ShiftImm);
BuildMI(*BB, BB->begin(), dl, TII->get(Mips::SRL), Tmp8)
.addReg(Oldval4).addReg(Shift);
MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
//===----------------------------------------------------------------------===//
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//

View File

@ -176,6 +176,16 @@ namespace llvm {
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
bool Nand = false) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *EmitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
};
}

View File

@ -405,6 +405,115 @@ def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>;
def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc\n", []>;
let usesCustomInserter = 1 in {
def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_add_8\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_add_8 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_add_16\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_add_16 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_add_32\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_add_32 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_sub_8\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_sub_8 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_sub_16\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_sub_16 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_sub_32\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_sub_32 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_AND_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_and_8\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_and_8 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_AND_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_and_16\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_and_16 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_AND_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_and_32\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_and_32 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_OR_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_or_8\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_or_8 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_OR_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_or_16\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_or_16 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_OR_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_or_32\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_or_32 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_xor_8\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_xor_8 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_xor_16\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_xor_16 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_xor_32\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_xor_32 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_nand_8\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_nand_8 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_nand_16\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_nand_16 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
"atomic_load_nand_32\t$dst, $ptr, $incr",
[(set CPURegs:$dst, (atomic_load_nand_32 CPURegs:$ptr, CPURegs:$incr))]>;
def ATOMIC_SWAP_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
"atomic_swap_8\t$dst, $ptr, $val",
[(set CPURegs:$dst, (atomic_swap_8 CPURegs:$ptr, CPURegs:$val))]>;
def ATOMIC_SWAP_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
"atomic_swap_16\t$dst, $ptr, $val",
[(set CPURegs:$dst, (atomic_swap_16 CPURegs:$ptr, CPURegs:$val))]>;
def ATOMIC_SWAP_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$val),
"atomic_swap_32\t$dst, $ptr, $val",
[(set CPURegs:$dst, (atomic_swap_32 CPURegs:$ptr, CPURegs:$val))]>;
def ATOMIC_CMP_SWAP_I8 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
"atomic_cmp_swap_8\t$dst, $ptr, $oldval, $newval",
[(set CPURegs:$dst,
(atomic_cmp_swap_8 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
def ATOMIC_CMP_SWAP_I16 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
"atomic_cmp_swap_16\t$dst, $ptr, $oldval, $newval",
[(set CPURegs:$dst,
(atomic_cmp_swap_16 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
def ATOMIC_CMP_SWAP_I32 : MipsPseudo<
(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval),
"atomic_cmp_swap_32\t$dst, $ptr, $oldval, $newval",
[(set CPURegs:$dst,
(atomic_cmp_swap_32 CPURegs:$ptr, CPURegs:$oldval, CPURegs:$newval))]>;
}
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
@ -459,6 +568,14 @@ def SB : StoreM<0x28, "sb", truncstorei8>;
def SH : StoreM<0x29, "sh", truncstorei16>;
def SW : StoreM<0x2b, "sw", store>;
/// Load-linked, Store-conditional
let hasDelaySlot = 1 in
def LL : FI<0x30, (outs CPURegs:$dst), (ins mem:$addr),
"ll\t$dst, $addr", [], IILoad>;
let Constraints = "$src = $dst" in
def SC : FI<0x38, (outs CPURegs:$dst), (ins CPURegs:$src, mem:$addr),
"sc\t$src, $addr", [], IIStore>;
/// Jump and Branch Instructions
def J : JumpFJ<0x02, "j">;
def JR : JumpFR<0x00, 0x08, "jr">;

View File

@ -48,11 +48,17 @@ private:
std::pair<int, int> InArgFIRange, OutArgFIRange;
int GPFI; // Index of the frame object for restoring $gp
unsigned MaxCallFrameSize;
/// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
/// intrinsics, it is necessary to use a temporary stack location.
/// This field holds the frame index of this location.
int AtomicFrameIndex;
public:
MipsFunctionInfo(MachineFunction& MF)
: SRetReturnReg(0), GlobalBaseReg(0),
VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0)
OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0),
AtomicFrameIndex(-1)
{}
bool isInArgFI(int FI) const {
@ -86,6 +92,9 @@ public:
unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
int getAtomicFrameIndex() const { return AtomicFrameIndex; }
void setAtomicFrameIndex(int Index) { AtomicFrameIndex = Index; }
};
} // end of namespace llvm

View File

@ -0,0 +1,253 @@
; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* nocapture, i32) nounwind
declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind
declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind
declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* nocapture, i8) nounwind
declare i8 @llvm.atomic.swap.i8.p0i8(i8* nocapture, i8) nounwind
declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
@x = common global i32 0, align 4
define i32 @AtomicLoadAdd32(i32 %incr) nounwind {
entry:
%0 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* @x, i32 %incr)
ret i32 %0
; CHECK: AtomicLoadAdd32:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: or $2, $zero, $[[R1]]
; CHECK: addu $[[R2:[0-9]+]], $[[R1]], $4
; CHECK: sc $[[R2]], 0($[[R0]])
; CHECK: beq $[[R2]], $zero, $[[BB0]]
}
define i32 @AtomicLoadNand32(i32 %incr) nounwind {
entry:
%0 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* @x, i32 %incr)
ret i32 %0
; CHECK: AtomicLoadNand32:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: or $2, $zero, $[[R1]]
; CHECK: and $[[R1]], $[[R1]], $4
; CHECK: nor $[[R2:[0-9]+]], $zero, $[[R1]]
; CHECK: sc $[[R2]], 0($[[R0]])
; CHECK: beq $[[R2]], $zero, $[[BB0]]
}
define i32 @AtomicSwap32(i32 %oldval) nounwind {
entry:
%0 = call i32 @llvm.atomic.swap.i32.p0i32(i32* @x, i32 %oldval)
ret i32 %0
; CHECK: AtomicSwap32:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: sw $4, [[OFFSET:[0-9]+]]($sp)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R1:[0-9]+]], 0($[[R0]])
; CHECK: or $2, $zero, $[[R1]]
; CHECK: lw $[[R2:[0-9]+]], [[OFFSET]]($sp)
; CHECK: or $[[R3:[0-9]+]], $zero, $[[R2]]
; CHECK: sc $[[R3]], 0($[[R0]])
; CHECK: beq $[[R3]], $zero, $[[BB0]]
}
define i32 @AtomicCmpSwap32(i32 %oldval, i32 %newval) nounwind {
entry:
%0 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* @x, i32 %oldval, i32 %newval)
ret i32 %0
; CHECK: AtomicCmpSwap32:
; CHECK: lw $[[R0:[0-9]+]], %got(x)($gp)
; CHECK: sw $5, [[OFFSET:[0-9]+]]($sp)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $2, 0($[[R0]])
; CHECK: bne $2, $4, $[[BB1:[A-Z_0-9]+]]
; CHECK: lw $[[R1:[0-9]+]], [[OFFSET]]($sp)
; CHECK: or $[[R2:[0-9]+]], $zero, $[[R1]]
; CHECK: sc $[[R2]], 0($[[R0]])
; CHECK: beq $[[R2]], $zero, $[[BB0]]
; CHECK: $[[BB1]]:
}
@y = common global i8 0, align 1
define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
entry:
%0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @y, i8 %incr)
ret i8 %0
; CHECK: AtomicLoadAdd8:
; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3
; CHECK: ori $[[R5:[0-9]+]], $zero, 255
; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]]
; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]]
; CHECK: andi $[[R8:[0-9]+]], $4, 255
; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]]
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]])
; CHECK: addu $[[R11:[0-9]+]], $[[R10]], $[[R9]]
; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]]
; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]]
; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]]
; CHECK: sc $[[R14]], 0($[[R2]])
; CHECK: beq $[[R14]], $zero, $[[BB0]]
; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]]
; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]]
; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24
; CHECK: sra $2, $[[R17]], 24
}
define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind {
entry:
%0 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @y, i8 %incr)
ret i8 %0
; CHECK: AtomicLoadSub8:
; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3
; CHECK: ori $[[R5:[0-9]+]], $zero, 255
; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]]
; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]]
; CHECK: subu $[[R18:[0-9]+]], $zero, $4
; CHECK: andi $[[R8:[0-9]+]], $[[R18]], 255
; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]]
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]])
; CHECK: addu $[[R11:[0-9]+]], $[[R10]], $[[R9]]
; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]]
; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]]
; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]]
; CHECK: sc $[[R14]], 0($[[R2]])
; CHECK: beq $[[R14]], $zero, $[[BB0]]
; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]]
; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]]
; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24
; CHECK: sra $2, $[[R17]], 24
}
define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind {
entry:
%0 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @y, i8 %incr)
ret i8 %0
; CHECK: AtomicLoadNand8:
; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3
; CHECK: ori $[[R5:[0-9]+]], $zero, 255
; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]]
; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]]
; CHECK: andi $[[R8:[0-9]+]], $4, 255
; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]]
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]])
; CHECK: and $[[R18:[0-9]+]], $[[R10]], $[[R9]]
; CHECK: nor $[[R11:[0-9]+]], $zero, $[[R18]]
; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]]
; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]]
; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]]
; CHECK: sc $[[R14]], 0($[[R2]])
; CHECK: beq $[[R14]], $zero, $[[BB0]]
; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]]
; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]]
; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24
; CHECK: sra $2, $[[R17]], 24
}
define signext i8 @AtomicSwap8(i8 signext %oldval) nounwind {
entry:
%0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @y, i8 %oldval)
ret i8 %0
; CHECK: AtomicSwap8:
; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3
; CHECK: ori $[[R5:[0-9]+]], $zero, 255
; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]]
; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]]
; CHECK: andi $[[R8:[0-9]+]], $4, 255
; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]]
; CHECK: sw $[[R9]], [[OFFSET:[0-9]+]]($sp)
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R10:[0-9]+]], 0($[[R2]])
; CHECK: lw $[[R18:[0-9]+]], [[OFFSET]]($sp)
; CHECK: or $[[R11:[0-9]+]], $zero, $[[R18]]
; CHECK: and $[[R12:[0-9]+]], $[[R11]], $[[R6]]
; CHECK: and $[[R13:[0-9]+]], $[[R10]], $[[R7]]
; CHECK: or $[[R14:[0-9]+]], $[[R13]], $[[R12]]
; CHECK: sc $[[R14]], 0($[[R2]])
; CHECK: beq $[[R14]], $zero, $[[BB0]]
; CHECK: and $[[R15:[0-9]+]], $[[R10]], $[[R6]]
; CHECK: srl $[[R16:[0-9]+]], $[[R15]], $[[R4]]
; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24
; CHECK: sra $2, $[[R17]], 24
}
define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
entry:
%0 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @y, i8 %oldval, i8 %newval)
ret i8 %0
; CHECK: AtomicCmpSwap8:
; CHECK: lw $[[R0:[0-9]+]], %got(y)($gp)
; CHECK: addiu $[[R1:[0-9]+]], $zero, -4
; CHECK: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
; CHECK: andi $[[R3:[0-9]+]], $[[R0]], 3
; CHECK: sll $[[R4:[0-9]+]], $[[R3]], 3
; CHECK: ori $[[R5:[0-9]+]], $zero, 255
; CHECK: sll $[[R6:[0-9]+]], $[[R5]], $[[R4]]
; CHECK: nor $[[R7:[0-9]+]], $zero, $[[R6]]
; CHECK: andi $[[R8:[0-9]+]], $4, 255
; CHECK: sll $[[R9:[0-9]+]], $[[R8]], $[[R4]]
; CHECK: andi $[[R10:[0-9]+]], $5, 255
; CHECK: sll $[[R11:[0-9]+]], $[[R10]], $[[R4]]
; CHECK: $[[BB0:[A-Z_0-9]+]]:
; CHECK: ll $[[R12:[0-9]+]], 0($[[R2]])
; CHECK: and $[[R13:[0-9]+]], $[[R12]], $[[R6]]
; CHECK: bne $[[R13]], $[[R9]], $[[BB1:[A-Z_0-9]+]]
; CHECK: and $[[R14:[0-9]+]], $[[R12]], $[[R7]]
; CHECK: or $[[R15:[0-9]+]], $[[R14]], $[[R11]]
; CHECK: sc $[[R15]], 0($[[R2]])
; CHECK: beq $[[R15]], $zero, $[[BB0]]
; CHECK: $[[BB1]]:
; CHECK: srl $[[R16:[0-9]+]], $[[R13]], $[[R4]]
; CHECK: sll $[[R17:[0-9]+]], $[[R16]], 24
; CHECK: sra $2, $[[R17]], 24
}