AMDGPU: Add encoding for carryless add/sub instructions
llvm-svn: 308639
This commit is contained in:
parent
f65c5ac9c9
commit
c37fe66ec5
|
@ -79,6 +79,12 @@ def FeatureFlatScratchInsts : SubtargetFeature<"flat-scratch-insts",
|
||||||
"Have scratch_* flat memory instructions"
|
"Have scratch_* flat memory instructions"
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
|
||||||
|
"AddNoCarryInsts",
|
||||||
|
"true",
|
||||||
|
"Have VALU add/sub instructions without carry out"
|
||||||
|
>;
|
||||||
|
|
||||||
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
|
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
|
||||||
"UnalignedBufferAccess",
|
"UnalignedBufferAccess",
|
||||||
"true",
|
"true",
|
||||||
|
@ -464,7 +470,8 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
|
||||||
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
|
FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode,
|
||||||
FeatureFastFMAF32, FeatureDPP,
|
FeatureFastFMAF32, FeatureDPP,
|
||||||
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
|
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
|
||||||
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts
|
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
|
||||||
|
FeatureAddNoCarryInsts
|
||||||
]
|
]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
@ -681,6 +688,12 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
|
||||||
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
|
def HasFlatGlobalInsts : Predicate<"Subtarget->hasFlatGlobalInsts()">,
|
||||||
AssemblerPredicate<"FeatureFlatGlobalInsts">;
|
AssemblerPredicate<"FeatureFlatGlobalInsts">;
|
||||||
|
|
||||||
|
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarryInsts()">,
|
||||||
|
AssemblerPredicate<"FeatureAddNoCarryInsts">;
|
||||||
|
|
||||||
|
def NotHasAddNoCarryInsts : Predicate<"!Subtarget->hasAddNoCarryInsts()">,
|
||||||
|
AssemblerPredicate<"!FeatureAddNoCarryInsts">;
|
||||||
|
|
||||||
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
|
def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">,
|
||||||
AssemblerPredicate<"Feature16BitInsts">;
|
AssemblerPredicate<"Feature16BitInsts">;
|
||||||
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
|
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
|
||||||
|
|
|
@ -167,6 +167,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||||
FlatInstOffsets(false),
|
FlatInstOffsets(false),
|
||||||
FlatGlobalInsts(false),
|
FlatGlobalInsts(false),
|
||||||
FlatScratchInsts(false),
|
FlatScratchInsts(false),
|
||||||
|
AddNoCarryInsts(false),
|
||||||
|
|
||||||
R600ALUInst(false),
|
R600ALUInst(false),
|
||||||
CaymanISA(false),
|
CaymanISA(false),
|
||||||
|
|
|
@ -159,6 +159,7 @@ protected:
|
||||||
bool FlatInstOffsets;
|
bool FlatInstOffsets;
|
||||||
bool FlatGlobalInsts;
|
bool FlatGlobalInsts;
|
||||||
bool FlatScratchInsts;
|
bool FlatScratchInsts;
|
||||||
|
bool AddNoCarryInsts;
|
||||||
bool R600ALUInst;
|
bool R600ALUInst;
|
||||||
bool CaymanISA;
|
bool CaymanISA;
|
||||||
bool CFALUBug;
|
bool CFALUBug;
|
||||||
|
@ -419,6 +420,10 @@ public:
|
||||||
return FlatScratchInsts;
|
return FlatScratchInsts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool hasAddNoCarry() const {
|
||||||
|
return AddNoCarryInsts;
|
||||||
|
}
|
||||||
|
|
||||||
bool isMesaKernel(const MachineFunction &MF) const {
|
bool isMesaKernel(const MachineFunction &MF) const {
|
||||||
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
|
return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1300,8 +1300,43 @@ def : IntMed3Pat<V_MED3_U16, umax, umax_oneuse, umin_oneuse, i16>;
|
||||||
// Assembler aliases
|
// Assembler aliases
|
||||||
//============================================================================//
|
//============================================================================//
|
||||||
|
|
||||||
|
multiclass NoCarryAlias<string Inst,
|
||||||
|
Instruction Inst32NC, Instruction Inst64NC,
|
||||||
|
Instruction Inst32CO, Instruction Inst64CO> {
|
||||||
|
def : InstAlias<Inst#" $vdst, $src0, $src1",
|
||||||
|
(Inst32NC VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
|
||||||
|
Requires<[HasAddNoCarryInsts]>;
|
||||||
|
|
||||||
|
def : InstAlias<Inst#" $vdst, $src0, $src1",
|
||||||
|
(Inst64NC VGPR_32:$vdst, VCSrc_b32:$src0, VCSrc_b32:$src1), -10>,
|
||||||
|
Requires<[HasAddNoCarryInsts]>;
|
||||||
|
|
||||||
|
def : InstAlias<Inst#" $vdst, vcc, $src0, $src1",
|
||||||
|
(Inst32CO VGPR_32:$vdst, VSrc_b32:$src0, VGPR_32:$src1), 1000>,
|
||||||
|
Requires<[HasAddNoCarryInsts]>;
|
||||||
|
|
||||||
|
def : InstAlias<Inst#" $vdst, $sdst, $src0, $src1",
|
||||||
|
(Inst64CO VGPR_32:$vdst, SReg_64:$sdst, VSrc_b32:$src0, VGPR_32:$src1), -10>,
|
||||||
|
Requires<[HasAddNoCarryInsts]>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// gfx9 made a mess of add instruction names. The existing add
|
||||||
|
// instructions add _co added to the names, and their old names were
|
||||||
|
// repurposed to a version without carry out.
|
||||||
|
let Predicates = [HasAddNoCarryInsts] in {
|
||||||
|
defm : NoCarryAlias<"v_add_u32", V_ADD_U32_e32_vi, V_ADD_U32_e64_vi,
|
||||||
|
V_ADD_I32_e32_vi, V_ADD_I32_e64_vi>;
|
||||||
|
defm : NoCarryAlias<"v_sub_u32", V_SUB_U32_e32_vi, V_SUB_U32_e64_vi,
|
||||||
|
V_SUB_I32_e32_vi, V_SUB_I32_e64_vi>;
|
||||||
|
defm : NoCarryAlias<"v_subrev_u32",
|
||||||
|
V_SUBREV_U32_e32_vi, V_SUBREV_U32_e64_vi,
|
||||||
|
V_SUBREV_I32_e32_vi, V_SUBREV_I32_e64_vi>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Predicates = [NotHasAddNoCarryInsts] in {
|
||||||
def : MnemonicAlias<"v_add_u32", "v_add_i32">;
|
def : MnemonicAlias<"v_add_u32", "v_add_i32">;
|
||||||
def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
|
def : MnemonicAlias<"v_sub_u32", "v_sub_i32">;
|
||||||
def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
|
def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">;
|
||||||
|
}
|
||||||
|
|
||||||
} // End isGCN predicate
|
} // End isGCN predicate
|
||||||
|
|
|
@ -375,6 +375,14 @@ defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag,
|
||||||
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
|
defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
|
||||||
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
|
defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
|
||||||
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
|
defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
|
||||||
|
|
||||||
|
|
||||||
|
let SubtargetPredicate = HasAddNoCarryInsts in {
|
||||||
|
defm V_ADD_U32 : VOP2Inst <"v_add_u32", VOP_I32_I32_I32>;
|
||||||
|
defm V_SUB_U32 : VOP2Inst <"v_sub_u32", VOP_I32_I32_I32>;
|
||||||
|
defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32, null_frag, "v_sub_u32">;
|
||||||
|
}
|
||||||
|
|
||||||
} // End isCommutable = 1
|
} // End isCommutable = 1
|
||||||
|
|
||||||
// These are special and do not read the exec mask.
|
// These are special and do not read the exec mask.
|
||||||
|
@ -833,3 +841,9 @@ def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
|
||||||
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
|
def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
|
||||||
|
|
||||||
} // End SubtargetPredicate = isVI
|
} // End SubtargetPredicate = isVI
|
||||||
|
|
||||||
|
let SubtargetPredicate = HasAddNoCarryInsts in {
|
||||||
|
defm V_ADD_U32 : VOP2_Real_e32e64_vi <0x34>;
|
||||||
|
defm V_SUB_U32 : VOP2_Real_e32e64_vi <0x35>;
|
||||||
|
defm V_SUBREV_U32 : VOP2_Real_e32e64_vi <0x36>;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji -show-encoding %s | FileCheck -check-prefixes=GCN,VI %s
|
||||||
|
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=fiji %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s
|
||||||
|
// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefixes=ERR-SICIVI %s
|
||||||
|
// FIXME: pre-gfx9 errors should be more useful
|
||||||
|
|
||||||
|
|
||||||
|
// FIXME: These should parse to VOP2 encoding
|
||||||
|
v_add_u32 v1, v2, v3
|
||||||
|
// GFX9: v_add_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x07,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_add_u32 v1, v2, s1
|
||||||
|
// GFX9: v_add_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0x03,0x00,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_add_u32 v1, s1, v2
|
||||||
|
// GFX9: v_add_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x34,0xd1,0x01,0x04,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_add_u32 v1, 4.0, v2
|
||||||
|
// GFX9: v_add_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x34,0xd1,0xf6,0x04,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_add_u32 v1, v2, 4.0
|
||||||
|
// GFX9: v_add_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x34,0xd1,0x02,0xed,0x01,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_add_u32_e32 v1, v2, v3
|
||||||
|
// GFX9: v_add_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x68]
|
||||||
|
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_add_u32_e32 v1, s1, v3
|
||||||
|
// GFX9: v_add_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x68]
|
||||||
|
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
v_sub_u32 v1, v2, v3
|
||||||
|
// GFX9: v_sub_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x07,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_sub_u32 v1, v2, s1
|
||||||
|
// GFX9: v_sub_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0x03,0x00,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_sub_u32 v1, s1, v2
|
||||||
|
// GFX9: v_sub_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x35,0xd1,0x01,0x04,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_sub_u32 v1, 4.0, v2
|
||||||
|
// GFX9: v_sub_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x35,0xd1,0xf6,0x04,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_sub_u32 v1, v2, 4.0
|
||||||
|
// GFX9: v_sub_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x35,0xd1,0x02,0xed,0x01,0x00]
|
||||||
|
// ERR-SICIVI: :15: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_sub_u32_e32 v1, v2, v3
|
||||||
|
// GFX9: v_sub_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6a]
|
||||||
|
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_sub_u32_e32 v1, s1, v3
|
||||||
|
// GFX9: v_sub_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6a]
|
||||||
|
// ERR-SICIVI: :19: error: invalid operand for instruction
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
v_subrev_u32 v1, v2, v3
|
||||||
|
// GFX9: v_subrev_u32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x07,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_subrev_u32 v1, v2, s1
|
||||||
|
// GFX9: v_subrev_u32_e64 v1, v2, s1 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0x03,0x00,0x00]
|
||||||
|
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_subrev_u32 v1, s1, v2
|
||||||
|
// GFX9: v_subrev_u32_e64 v1, s1, v2 ; encoding: [0x01,0x00,0x36,0xd1,0x01,0x04,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_subrev_u32 v1, 4.0, v2
|
||||||
|
// GFX9: v_subrev_u32_e64 v1, 4.0, v2 ; encoding: [0x01,0x00,0x36,0xd1,0xf6,0x04,0x02,0x00]
|
||||||
|
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_subrev_u32 v1, v2, 4.0
|
||||||
|
// GFX9: v_subrev_u32_e64 v1, v2, 4.0 ; encoding: [0x01,0x00,0x36,0xd1,0x02,0xed,0x01,0x00]
|
||||||
|
// ERR-SICIVI: :18: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_subrev_u32_e32 v1, v2, v3
|
||||||
|
// GFX9: v_subrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x6c]
|
||||||
|
// ERR-SICIVI: :22: error: invalid operand for instruction
|
||||||
|
|
||||||
|
v_subrev_u32_e32 v1, s1, v3
|
||||||
|
// GFX9: v_subrev_u32_e32 v1, s1, v3 ; encoding: [0x01,0x06,0x02,0x6c]
|
||||||
|
// ERR-SICIVI: :22: error: invalid operand for instruction
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
v_add_u32 v1, vcc, v2, v3
|
||||||
|
// GCN: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
|
||||||
|
|
||||||
|
v_add_u32 v1, s[0:1], v2, v3
|
||||||
|
// GCN: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
|
Loading…
Reference in New Issue