R600: Consolidate sub register indices.

Use sub0-15 everywhere.

Patch by: Michel Dänzerr

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
llvm-svn: 174610
This commit is contained in:
Tom Stellard 2013-02-07 14:02:37 +00:00
parent e06163a9a6
commit 9355b22180
8 changed files with 73 additions and 93 deletions

View File

@ -196,8 +196,8 @@ class Vector_Build <ValueType vecType, RegisterClass vectorClass,
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
(elemType elemClass:$z), (elemType elemClass:$w))),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
(vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
elemClass:$z, sel_z), elemClass:$w, sel_w)
(vecType (IMPLICIT_DEF)), elemClass:$x, sub0), elemClass:$y, sub1),
elemClass:$z, sub2), elemClass:$w, sub3)
>;
// bitconvert pattern

View File

@ -51,22 +51,22 @@ unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
unsigned AMDGPURegisterInfo::getIndirectSubReg(unsigned IndirectIndex) const {
switch(IndirectIndex) {
case 0: return AMDGPU::indirect_0;
case 1: return AMDGPU::indirect_1;
case 2: return AMDGPU::indirect_2;
case 3: return AMDGPU::indirect_3;
case 4: return AMDGPU::indirect_4;
case 5: return AMDGPU::indirect_5;
case 6: return AMDGPU::indirect_6;
case 7: return AMDGPU::indirect_7;
case 8: return AMDGPU::indirect_8;
case 9: return AMDGPU::indirect_9;
case 10: return AMDGPU::indirect_10;
case 11: return AMDGPU::indirect_11;
case 12: return AMDGPU::indirect_12;
case 13: return AMDGPU::indirect_13;
case 14: return AMDGPU::indirect_14;
case 15: return AMDGPU::indirect_15;
case 0: return AMDGPU::sub0;
case 1: return AMDGPU::sub1;
case 2: return AMDGPU::sub2;
case 3: return AMDGPU::sub3;
case 4: return AMDGPU::sub4;
case 5: return AMDGPU::sub5;
case 6: return AMDGPU::sub6;
case 7: return AMDGPU::sub7;
case 8: return AMDGPU::sub8;
case 9: return AMDGPU::sub9;
case 10: return AMDGPU::sub10;
case 11: return AMDGPU::sub11;
case 12: return AMDGPU::sub12;
case 13: return AMDGPU::sub13;
case 14: return AMDGPU::sub14;
case 15: return AMDGPU::sub15;
default: llvm_unreachable("indirect index out of range");
}
}

View File

@ -12,14 +12,9 @@
//===----------------------------------------------------------------------===//
let Namespace = "AMDGPU" in {
def sel_x : SubRegIndex;
def sel_y : SubRegIndex;
def sel_z : SubRegIndex;
def sel_w : SubRegIndex;
foreach Index = 0-15 in {
def indirect_#Index : SubRegIndex;
def sub#Index : SubRegIndex;
}
def INDIRECT_BASE_ADDR : Register <"INDIRECT_BASE_ADDR">;

View File

@ -578,13 +578,13 @@ class ExportBufWord1 {
multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
def : Pat<(int_R600_store_pixel_depth R600_Reg32:$reg),
(ExportInst
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 0, 7, 7, 7, cf_inst, 0)
>;
def : Pat<(int_R600_store_pixel_stencil R600_Reg32:$reg),
(ExportInst
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sel_x),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), R600_Reg32:$reg, sub0),
0, 61, 7, 0, 7, 7, cf_inst, 0)
>;
@ -1868,25 +1868,25 @@ def : Pat <
(SETNE_DX10 R600_Reg32:$src0, R600_Reg32:$src1)
>;
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sel_x>;
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sel_y>;
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sel_z>;
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sel_w>;
def : Extract_Element <f32, v4f32, R600_Reg128, 0, sub0>;
def : Extract_Element <f32, v4f32, R600_Reg128, 1, sub1>;
def : Extract_Element <f32, v4f32, R600_Reg128, 2, sub2>;
def : Extract_Element <f32, v4f32, R600_Reg128, 3, sub3>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sel_x>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sel_y>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sel_z>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sel_w>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 0, sub0>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 3, sub3>;
def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>;
def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>;
def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>;
def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>;
def : Extract_Element <i32, v4i32, R600_Reg128, 0, sub0>;
def : Extract_Element <i32, v4i32, R600_Reg128, 1, sub1>;
def : Extract_Element <i32, v4i32, R600_Reg128, 2, sub2>;
def : Extract_Element <i32, v4i32, R600_Reg128, 3, sub3>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sel_x>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sel_y>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sel_z>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sel_w>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 0, sub0>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>;
def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>;
def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>;
def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>;

View File

@ -84,10 +84,10 @@ const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const {
switch (Channel) {
default: assert(!"Invalid channel index"); return 0;
case 0: return AMDGPU::sel_x;
case 1: return AMDGPU::sel_y;
case 2: return AMDGPU::sel_z;
case 3: return AMDGPU::sel_w;
case 0: return AMDGPU::sub0;
case 1: return AMDGPU::sub1;
case 2: return AMDGPU::sub2;
case 3: return AMDGPU::sub3;
}
}

View File

@ -19,7 +19,7 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
let SubRegIndices = [sub0, sub1, sub2, sub3];
let HWEncoding = encoding;
}
@ -126,9 +126,8 @@ class IndirectSuper<string n, list<Register> subregs> :
RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices =
[indirect_0,indirect_1,indirect_2,indirect_3,indirect_4,indirect_5,indirect_6,
indirect_7,indirect_8,indirect_9,indirect_10,indirect_11,indirect_12,
indirect_13,indirect_14,indirect_15];
[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15];
}
def IndirectSuperReg : IndirectSuper<"Indirect",

View File

@ -1215,15 +1215,15 @@ def CLAMP_SI : CLAMP<VReg_32>;
def FABS_SI : FABS<VReg_32>;
def FNEG_SI : FNEG<VReg_32>;
def : Extract_Element <f32, v4f32, VReg_128, 0, sel_x>;
def : Extract_Element <f32, v4f32, VReg_128, 1, sel_y>;
def : Extract_Element <f32, v4f32, VReg_128, 2, sel_z>;
def : Extract_Element <f32, v4f32, VReg_128, 3, sel_w>;
def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>;
def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>;
def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>;
def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sel_x>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sel_y>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sel_z>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sel_w>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>;
def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>;
def : Vector_Build <v4f32, VReg_128, f32, VReg_32>;
def : Vector_Build <v4i32, SReg_128, i32, SReg_32>;
@ -1338,22 +1338,22 @@ def : Pat <
def : Pat <
(int_AMDGPU_cube VReg_128:$src),
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
(V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
(EXTRACT_SUBREG VReg_128:$src, sel_y),
(EXTRACT_SUBREG VReg_128:$src, sel_z),
0, 0, 0, 0), sel_x),
(V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
(EXTRACT_SUBREG VReg_128:$src, sel_y),
(EXTRACT_SUBREG VReg_128:$src, sel_z),
0, 0, 0, 0), sel_y),
(V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
(EXTRACT_SUBREG VReg_128:$src, sel_y),
(EXTRACT_SUBREG VReg_128:$src, sel_z),
0, 0, 0, 0), sel_z),
(V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sel_x),
(EXTRACT_SUBREG VReg_128:$src, sel_y),
(EXTRACT_SUBREG VReg_128:$src, sel_z),
0, 0, 0, 0), sel_w)
(V_CUBETC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
(EXTRACT_SUBREG VReg_128:$src, sub1),
(EXTRACT_SUBREG VReg_128:$src, sub2),
0, 0, 0, 0), sub0),
(V_CUBESC_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
(EXTRACT_SUBREG VReg_128:$src, sub1),
(EXTRACT_SUBREG VReg_128:$src, sub2),
0, 0, 0, 0), sub1),
(V_CUBEMA_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
(EXTRACT_SUBREG VReg_128:$src, sub1),
(EXTRACT_SUBREG VReg_128:$src, sub2),
0, 0, 0, 0), sub2),
(V_CUBEID_F32 (EXTRACT_SUBREG VReg_128:$src, sub0),
(EXTRACT_SUBREG VReg_128:$src, sub1),
(EXTRACT_SUBREG VReg_128:$src, sub2),
0, 0, 0, 0), sub3)
>;
/********** ================== **********/

View File

@ -1,18 +1,4 @@
let Namespace = "AMDGPU" in {
def low : SubRegIndex;
def high : SubRegIndex;
def sub0 : SubRegIndex;
def sub1 : SubRegIndex;
def sub2 : SubRegIndex;
def sub3 : SubRegIndex;
def sub4 : SubRegIndex;
def sub5 : SubRegIndex;
def sub6 : SubRegIndex;
def sub7 : SubRegIndex;
}
class SIReg <string n, bits<16> encoding = 0> : Register<n> {
let Namespace = "AMDGPU";
let HWEncoding = encoding;
@ -20,7 +6,7 @@ class SIReg <string n, bits<16> encoding = 0> : Register<n> {
class SI_64 <string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> {
let Namespace = "AMDGPU";
let SubRegIndices = [low, high];
let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
}
@ -73,12 +59,12 @@ def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
def SGPR_64 : RegisterTuples<[low, high],
def SGPR_64 : RegisterTuples<[sub0, sub1],
[(add (decimate SGPR_32, 2)),
(add(decimate (rotl SGPR_32, 1), 2))]>;
// SGPR 128-bit registers
def SGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate SGPR_32, 4)),
(add (decimate (rotl SGPR_32, 1), 4)),
(add (decimate (rotl SGPR_32, 2), 4)),
@ -104,12 +90,12 @@ def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
def VGPR_64 : RegisterTuples<[low, high],
def VGPR_64 : RegisterTuples<[sub0, sub1],
[(add VGPR_32),
(add (rotl VGPR_32, 1))]>;
// VGPR 128-bit registers
def VGPR_128 : RegisterTuples<[sel_x, sel_y, sel_z, sel_w],
def VGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add VGPR_32),
(add (rotl VGPR_32, 1)),
(add (rotl VGPR_32, 2)),