[AMDGPU] gfx1010 wave32 clang support
Differential Revision: https://reviews.llvm.org/D63209 llvm-svn: 363341
This commit is contained in:
parent
aca017e802
commit
8a8131a3f6
|
@ -2401,6 +2401,10 @@ AMDGPU
|
|||
CU wavefront execution mode is used if enabled and WGP wavefront execution mode
|
||||
is used if disabled (AMDGPU only)
|
||||
|
||||
.. option:: -mwavefrontsize64, -mno-wavefrontsize64
|
||||
|
||||
Wavefront size 64 is used if enabled and wavefront size 32 if disabled (AMDGPU only)
|
||||
|
||||
.. option:: -mxnack, -mno-xnack
|
||||
|
||||
Enable XNACK (AMDGPU only)
|
||||
|
|
|
@ -2216,6 +2216,11 @@ def mcumode : Flag<["-"], "mcumode">, Group<m_amdgpu_Features_Group>,
|
|||
def mno_cumode : Flag<["-"], "mno-cumode">, Group<m_amdgpu_Features_Group>,
|
||||
HelpText<"WGP wavefront execution mode is used (AMDGPU only)">;
|
||||
|
||||
def mwavefrontsize64 : Flag<["-"], "mwavefrontsize64">,
|
||||
Group<m_Group>, HelpText<"Wavefront size 64 is used">;
|
||||
def mno_wavefrontsize64 : Flag<["-"], "mno-wavefrontsize64">,
|
||||
Group<m_Group>, HelpText<"Wavefront size 32 is used">;
|
||||
|
||||
def faltivec : Flag<["-"], "faltivec">, Group<f_Group>, Flags<[DriverOption]>;
|
||||
def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>, Flags<[DriverOption]>;
|
||||
def maltivec : Flag<["-"], "maltivec">, Group<m_ppc_Features_Group>;
|
||||
|
|
|
@ -12736,11 +12736,27 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
|
|||
case AMDGPU::BI__builtin_amdgcn_uicmp:
|
||||
case AMDGPU::BI__builtin_amdgcn_uicmpl:
|
||||
case AMDGPU::BI__builtin_amdgcn_sicmp:
|
||||
case AMDGPU::BI__builtin_amdgcn_sicmpl:
|
||||
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
|
||||
case AMDGPU::BI__builtin_amdgcn_sicmpl: {
|
||||
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
|
||||
|
||||
// FIXME-GFX10: How should 32 bit mask be handled?
|
||||
Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
|
||||
{ Builder.getInt64Ty(), Src0->getType() });
|
||||
return Builder.CreateCall(F, { Src0, Src1, Src2 });
|
||||
}
|
||||
case AMDGPU::BI__builtin_amdgcn_fcmp:
|
||||
case AMDGPU::BI__builtin_amdgcn_fcmpf:
|
||||
return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
|
||||
case AMDGPU::BI__builtin_amdgcn_fcmpf: {
|
||||
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
|
||||
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
|
||||
|
||||
// FIXME-GFX10: How should 32 bit mask be handled?
|
||||
Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
|
||||
{ Builder.getInt64Ty(), Src0->getType() });
|
||||
return Builder.CreateCall(F, { Src0, Src1, Src2 });
|
||||
}
|
||||
case AMDGPU::BI__builtin_amdgcn_class:
|
||||
case AMDGPU::BI__builtin_amdgcn_classf:
|
||||
case AMDGPU::BI__builtin_amdgcn_classh:
|
||||
|
|
|
@ -41,6 +41,17 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
|
|||
if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
|
||||
D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
|
||||
|
||||
if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
|
||||
Features.push_back("-wavefrontsize16");
|
||||
Features.push_back("-wavefrontsize32");
|
||||
Features.push_back("+wavefrontsize64");
|
||||
}
|
||||
if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) {
|
||||
Features.push_back("-wavefrontsize16");
|
||||
Features.push_back("+wavefrontsize32");
|
||||
Features.push_back("-wavefrontsize64");
|
||||
}
|
||||
|
||||
handleTargetFeaturesGroup(
|
||||
Args, Features, options::OPT_m_amdgpu_Features_Group);
|
||||
}
|
||||
|
|
|
@ -316,15 +316,21 @@ void HIPToolChain::addClangTargetOptions(
|
|||
else
|
||||
FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
|
||||
|
||||
llvm::StringRef WaveFrontSizeBC;
|
||||
if (stoi(GFXVersion) < 1000)
|
||||
WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc";
|
||||
else
|
||||
WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc";
|
||||
|
||||
BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc",
|
||||
"ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc",
|
||||
FlushDenormalControlBC,
|
||||
"oclc_correctly_rounded_sqrt_on.amdgcn.bc",
|
||||
"oclc_unsafe_math_off.amdgcn.bc", ISAVerBC});
|
||||
"oclc_unsafe_math_off.amdgcn.bc", ISAVerBC,
|
||||
WaveFrontSizeBC});
|
||||
}
|
||||
for (auto Lib : BCLibs)
|
||||
addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib);
|
||||
|
||||
}
|
||||
|
||||
llvm::opt::DerivedArgList *
|
||||
|
|
|
@ -224,28 +224,28 @@ void test_lerp(global int* out, int a, int b, int c)
|
|||
}
|
||||
|
||||
// CHECK-LABEL: @test_sicmp_i32
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
|
||||
void test_sicmp_i32(global ulong* out, int a, int b)
|
||||
{
|
||||
*out = __builtin_amdgcn_sicmp(a, b, 32);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_uicmp_i32
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32)
|
||||
void test_uicmp_i32(global ulong* out, uint a, uint b)
|
||||
{
|
||||
*out = __builtin_amdgcn_uicmp(a, b, 32);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_sicmp_i64
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 38)
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 38)
|
||||
void test_sicmp_i64(global ulong* out, long a, long b)
|
||||
{
|
||||
*out = __builtin_amdgcn_sicmpl(a, b, 39-1);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_uicmp_i64
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 35)
|
||||
// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 35)
|
||||
void test_uicmp_i64(global ulong* out, ulong a, ulong b)
|
||||
{
|
||||
*out = __builtin_amdgcn_uicmpl(a, b, 30+5);
|
||||
|
@ -287,14 +287,14 @@ void test_readlane(global int* out, int a, int b)
|
|||
}
|
||||
|
||||
// CHECK-LABEL: @test_fcmp_f32
|
||||
// CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5)
|
||||
// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 5)
|
||||
void test_fcmp_f32(global ulong* out, float a, float b)
|
||||
{
|
||||
*out = __builtin_amdgcn_fcmpf(a, b, 5);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_fcmp_f64
|
||||
// CHECK: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 6)
|
||||
// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f64(double %a, double %b, i32 6)
|
||||
void test_fcmp_f64(global ulong* out, double a, double b)
|
||||
{
|
||||
*out = __builtin_amdgcn_fcmp(a, b, 3+3);
|
||||
|
|
|
@ -24,6 +24,12 @@
|
|||
// RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-sram-ecc %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s
|
||||
// NO-SRAM-ECC: "-target-feature" "-sram-ecc"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s
|
||||
// WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s
|
||||
// NO-WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "+wavefrontsize32" "-target-feature" "-wavefrontsize64"
|
||||
|
||||
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mcumode %s 2>&1 | FileCheck --check-prefix=CUMODE %s
|
||||
// CUMODE: "-target-feature" "+cumode"
|
||||
|
||||
|
|
Loading…
Reference in New Issue