parent
147700b8b4
commit
3abdbf1cad
|
@ -143,6 +143,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||||
EmitFCInstr(MI, OS);
|
EmitFCInstr(MI, OS);
|
||||||
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||||
MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
|
MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
|
||||||
|
MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
|
||||||
MI.getOpcode() == AMDGPU::BUNDLE ||
|
MI.getOpcode() == AMDGPU::BUNDLE ||
|
||||||
MI.getOpcode() == AMDGPU::KILL) {
|
MI.getOpcode() == AMDGPU::KILL) {
|
||||||
return;
|
return;
|
||||||
|
@ -255,7 +256,7 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||||
case AMDGPU::CF_ALU:
|
case AMDGPU::CF_ALU:
|
||||||
case AMDGPU::CF_ALU_PUSH_BEFORE: {
|
case AMDGPU::CF_ALU_PUSH_BEFORE: {
|
||||||
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
|
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
|
||||||
EmitByte(INSTR_CFALU, OS);
|
EmitByte(INSTR_NATIVE, OS);
|
||||||
Emit(Inst, OS);
|
Emit(Inst, OS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -294,7 +295,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
EmitALUInstr(MI, Fixups, OS);
|
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
|
||||||
|
EmitByte(INSTR_NATIVE, OS);
|
||||||
|
Emit(Inst, OS);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,6 +165,97 @@ private:
|
||||||
return ClauseFile(MIb, ClauseContent);
|
return ClauseFile(MIb, ClauseContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void getLiteral(MachineInstr *MI, std::vector<unsigned> &Lits) const {
|
||||||
|
unsigned LiteralRegs[] = {
|
||||||
|
AMDGPU::ALU_LITERAL_X,
|
||||||
|
AMDGPU::ALU_LITERAL_Y,
|
||||||
|
AMDGPU::ALU_LITERAL_Z,
|
||||||
|
AMDGPU::ALU_LITERAL_W
|
||||||
|
};
|
||||||
|
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
|
||||||
|
MachineOperand &MO = MI->getOperand(i);
|
||||||
|
if (!MO.isReg())
|
||||||
|
continue;
|
||||||
|
if (MO.getReg() != AMDGPU::ALU_LITERAL_X)
|
||||||
|
continue;
|
||||||
|
unsigned ImmIdx = TII->getOperandIdx(MI->getOpcode(), R600Operands::IMM);
|
||||||
|
int64_t Imm = MI->getOperand(ImmIdx).getImm();
|
||||||
|
std::vector<unsigned>::iterator It =
|
||||||
|
std::find(Lits.begin(), Lits.end(), Imm);
|
||||||
|
if (It != Lits.end()) {
|
||||||
|
unsigned Index = It - Lits.begin();
|
||||||
|
MO.setReg(LiteralRegs[Index]);
|
||||||
|
} else {
|
||||||
|
assert(Lits.size() < 4 && "Too many literals in Instruction Group");
|
||||||
|
MO.setReg(LiteralRegs[Lits.size()]);
|
||||||
|
Lits.push_back(Imm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock::iterator insertLiterals(
|
||||||
|
MachineBasicBlock::iterator InsertPos,
|
||||||
|
const std::vector<unsigned> &Literals) const {
|
||||||
|
MachineBasicBlock *MBB = InsertPos->getParent();
|
||||||
|
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
|
||||||
|
unsigned LiteralPair0 = Literals[i];
|
||||||
|
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
|
||||||
|
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
|
||||||
|
TII->get(AMDGPU::LITERALS))
|
||||||
|
.addImm(LiteralPair0)
|
||||||
|
.addImm(LiteralPair1);
|
||||||
|
}
|
||||||
|
return InsertPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
ClauseFile
|
||||||
|
MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
|
||||||
|
const {
|
||||||
|
MachineBasicBlock::iterator ClauseHead = I;
|
||||||
|
std::vector<MachineInstr *> ClauseContent;
|
||||||
|
I++;
|
||||||
|
for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
|
||||||
|
if (IsTrivialInst(I)) {
|
||||||
|
++I;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
|
||||||
|
break;
|
||||||
|
std::vector<unsigned> Literals;
|
||||||
|
if (I->isBundle()) {
|
||||||
|
MachineInstr *DeleteMI = I;
|
||||||
|
MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
|
||||||
|
while (++BI != E && BI->isBundledWithPred()) {
|
||||||
|
BI->unbundleFromPred();
|
||||||
|
for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
|
||||||
|
MachineOperand &MO = BI->getOperand(i);
|
||||||
|
if (MO.isReg() && MO.isInternalRead())
|
||||||
|
MO.setIsInternalRead(false);
|
||||||
|
}
|
||||||
|
getLiteral(BI, Literals);
|
||||||
|
ClauseContent.push_back(BI);
|
||||||
|
}
|
||||||
|
I = BI;
|
||||||
|
DeleteMI->eraseFromParent();
|
||||||
|
} else {
|
||||||
|
getLiteral(I, Literals);
|
||||||
|
ClauseContent.push_back(I);
|
||||||
|
I++;
|
||||||
|
}
|
||||||
|
for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
|
||||||
|
unsigned literal0 = Literals[i];
|
||||||
|
unsigned literal2 = (i + 1 < e)?Literals[i + 1]:0;
|
||||||
|
MachineInstr *MILit = BuildMI(MBB, I, I->getDebugLoc(),
|
||||||
|
TII->get(AMDGPU::LITERALS))
|
||||||
|
.addImm(literal0)
|
||||||
|
.addImm(literal2);
|
||||||
|
ClauseContent.push_back(MILit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ClauseHead->getOperand(7).setImm(ClauseContent.size() - 1);
|
||||||
|
return ClauseFile(ClauseHead, ClauseContent);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
|
EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
|
||||||
unsigned &CfCount) {
|
unsigned &CfCount) {
|
||||||
|
@ -178,6 +269,19 @@ private:
|
||||||
CfCount += 2 * Clause.second.size();
|
CfCount += 2 * Clause.second.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
|
||||||
|
unsigned &CfCount) {
|
||||||
|
CounterPropagateAddr(Clause.first, CfCount);
|
||||||
|
MachineBasicBlock *BB = Clause.first->getParent();
|
||||||
|
BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
|
||||||
|
.addImm(CfCount);
|
||||||
|
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
|
||||||
|
BB->splice(InsertPos, BB, Clause.second[i]);
|
||||||
|
}
|
||||||
|
CfCount += Clause.second.size();
|
||||||
|
}
|
||||||
|
|
||||||
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
|
void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const {
|
||||||
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
|
MI->getOperand(0).setImm(Addr + MI->getOperand(0).getImm());
|
||||||
}
|
}
|
||||||
|
@ -234,7 +338,7 @@ public:
|
||||||
getHWInstrDesc(CF_CALL_FS));
|
getHWInstrDesc(CF_CALL_FS));
|
||||||
CfCount++;
|
CfCount++;
|
||||||
}
|
}
|
||||||
std::vector<ClauseFile> FetchClauses;
|
std::vector<ClauseFile> FetchClauses, AluClauses;
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
I != E;) {
|
I != E;) {
|
||||||
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
|
if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
|
||||||
|
@ -252,6 +356,8 @@ public:
|
||||||
MaxStack = std::max(MaxStack, CurrentStack);
|
MaxStack = std::max(MaxStack, CurrentStack);
|
||||||
hasPush = true;
|
hasPush = true;
|
||||||
case AMDGPU::CF_ALU:
|
case AMDGPU::CF_ALU:
|
||||||
|
I = MI;
|
||||||
|
AluClauses.push_back(MakeALUClause(MBB, I));
|
||||||
case AMDGPU::EG_ExportBuf:
|
case AMDGPU::EG_ExportBuf:
|
||||||
case AMDGPU::EG_ExportSwz:
|
case AMDGPU::EG_ExportSwz:
|
||||||
case AMDGPU::R600_ExportBuf:
|
case AMDGPU::R600_ExportBuf:
|
||||||
|
@ -362,6 +468,8 @@ public:
|
||||||
}
|
}
|
||||||
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
|
for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
|
||||||
EmitFetchClause(I, FetchClauses[i], CfCount);
|
EmitFetchClause(I, FetchClauses[i], CfCount);
|
||||||
|
for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
|
||||||
|
EmitALUClause(I, AluClauses[i], CfCount);
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -944,6 +944,23 @@ def FETCH_CLAUSE : AMDGPUInst <(outs),
|
||||||
let Inst = num;
|
let Inst = num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def ALU_CLAUSE : AMDGPUInst <(outs),
|
||||||
|
(ins i32imm:$addr), "ALU clause starting at $addr:", [] > {
|
||||||
|
field bits<8> Inst;
|
||||||
|
bits<8> num;
|
||||||
|
let Inst = num;
|
||||||
|
}
|
||||||
|
|
||||||
|
def LITERALS : AMDGPUInst <(outs),
|
||||||
|
(ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > {
|
||||||
|
field bits<64> Inst;
|
||||||
|
bits<32> literal1;
|
||||||
|
bits<32> literal2;
|
||||||
|
|
||||||
|
let Inst{31-0} = literal1;
|
||||||
|
let Inst{63-32} = literal2;
|
||||||
|
}
|
||||||
|
|
||||||
def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
|
def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
|
||||||
field bits<64> Inst;
|
field bits<64> Inst;
|
||||||
}
|
}
|
||||||
|
|
|
@ -88,7 +88,10 @@ def NEG_ONE : R600Reg<"-1.0", 249>;
|
||||||
def ONE_INT : R600Reg<"1", 250>;
|
def ONE_INT : R600Reg<"1", 250>;
|
||||||
def HALF : R600Reg<"0.5", 252>;
|
def HALF : R600Reg<"0.5", 252>;
|
||||||
def NEG_HALF : R600Reg<"-0.5", 252>;
|
def NEG_HALF : R600Reg<"-0.5", 252>;
|
||||||
def ALU_LITERAL_X : R600Reg<"literal.x", 253>;
|
def ALU_LITERAL_X : R600RegWithChan<"literal.x", 253, "X">;
|
||||||
|
def ALU_LITERAL_Y : R600RegWithChan<"literal.x", 253, "Y">;
|
||||||
|
def ALU_LITERAL_Z : R600RegWithChan<"literal.x", 253, "Z">;
|
||||||
|
def ALU_LITERAL_W : R600RegWithChan<"literal.x", 253, "W">;
|
||||||
def PV_X : R600RegWithChan<"PV.x", 254, "X">;
|
def PV_X : R600RegWithChan<"PV.x", 254, "X">;
|
||||||
def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
|
def PV_Y : R600RegWithChan<"PV.y", 254, "Y">;
|
||||||
def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
|
def PV_Z : R600RegWithChan<"PV.z", 254, "Z">;
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
;CHECK: ALU
|
;CHECK: ALU
|
||||||
;CHECK: ALU
|
;CHECK: ALU
|
||||||
;CHECK-NOT: ALU
|
;CHECK-NOT: ALU
|
||||||
|
;CHECK: CF_END
|
||||||
|
|
||||||
define void @main() #0 {
|
define void @main() #0 {
|
||||||
main_body:
|
main_body:
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
; CHECK: @loop_ge
|
; CHECK: @loop_ge
|
||||||
; CHECK: LOOP_START_DX10
|
; CHECK: LOOP_START_DX10
|
||||||
; CHECK: PRED_SET
|
; CHECK: ALU_PUSH_BEFORE
|
||||||
; CHECK-NEXT: JUMP
|
; CHECK-NEXT: JUMP
|
||||||
; CHECK-NEXT: LOOP_BREAK
|
; CHECK-NEXT: LOOP_BREAK
|
||||||
define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
|
define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind {
|
||||||
|
|
|
@ -46,11 +46,11 @@ ENDIF:
|
||||||
|
|
||||||
; CHECK: @nested_if
|
; CHECK: @nested_if
|
||||||
; CHECK: ALU_PUSH_BEFORE
|
; CHECK: ALU_PUSH_BEFORE
|
||||||
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
|
|
||||||
; CHECK: JUMP
|
; CHECK: JUMP
|
||||||
|
; CHECK: POP
|
||||||
|
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
|
||||||
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
|
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
|
||||||
; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
|
; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
|
||||||
; CHECK: POP
|
|
||||||
define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
|
define void @nested_if(i32 addrspace(1)* %out, i32 %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = icmp sgt i32 %in, 0
|
%0 = icmp sgt i32 %in, 0
|
||||||
|
@ -73,12 +73,12 @@ ENDIF:
|
||||||
|
|
||||||
; CHECK: @nested_if_else
|
; CHECK: @nested_if_else
|
||||||
; CHECK: ALU_PUSH_BEFORE
|
; CHECK: ALU_PUSH_BEFORE
|
||||||
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
|
|
||||||
; CHECK: JUMP
|
; CHECK: JUMP
|
||||||
|
; CHECK: POP
|
||||||
|
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec
|
||||||
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
|
; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred,
|
||||||
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
|
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
|
||||||
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
|
; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel
|
||||||
; CHECK: POP
|
|
||||||
define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
|
define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = icmp sgt i32 %in, 0
|
%0 = icmp sgt i32 %in, 0
|
||||||
|
|
Loading…
Reference in New Issue