From 8a8131a3f64637217683871bc8f5f1864dccdc6d Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 13 Jun 2019 23:47:59 +0000 Subject: [PATCH] [AMDGPU] gfx1010 wave32 clang support Differential Revision: https://reviews.llvm.org/D63209 llvm-svn: 363341 --- clang/docs/ClangCommandLineReference.rst | 4 ++++ clang/include/clang/Driver/Options.td | 5 +++++ clang/lib/CodeGen/CGBuiltin.cpp | 24 +++++++++++++++++---- clang/lib/Driver/ToolChains/AMDGPU.cpp | 11 ++++++++++ clang/lib/Driver/ToolChains/HIP.cpp | 10 +++++++-- clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 12 +++++------ clang/test/Driver/amdgpu-features.c | 6 ++++++ 7 files changed, 60 insertions(+), 12 deletions(-) diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 5a34c9606f79..30ac27f95f9d 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -2401,6 +2401,10 @@ AMDGPU CU wavefront execution mode is used if enabled and WGP wavefront execution mode is used if disabled (AMDGPU only) +.. option:: -mwavefrontsize64, -mno-wavefrontsize64 + +Wavefront size 64 is used if enabled and wavefront size 32 if disabled (AMDGPU only) + .. option:: -mxnack, -mno-xnack Enable XNACK (AMDGPU only) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 90b6092b1bae..65c1721daa8b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2216,6 +2216,11 @@ def mcumode : Flag<["-"], "mcumode">, Group, def mno_cumode : Flag<["-"], "mno-cumode">, Group, HelpText<"WGP wavefront execution mode is used (AMDGPU only)">; +def mwavefrontsize64 : Flag<["-"], "mwavefrontsize64">, + Group, HelpText<"Wavefront size 64 is used">; +def mno_wavefrontsize64 : Flag<["-"], "mno-wavefrontsize64">, + Group, HelpText<"Wavefront size 32 is used">; + def faltivec : Flag<["-"], "faltivec">, Group, Flags<[DriverOption]>; def fno_altivec : Flag<["-"], "fno-altivec">, Group, Flags<[DriverOption]>; def maltivec : Flag<["-"], "maltivec">, Group; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 81f3e5664aa5..287e691870c2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -12736,11 +12736,27 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: - case AMDGPU::BI__builtin_amdgcn_sicmpl: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp); + case AMDGPU::BI__builtin_amdgcn_sicmpl: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + + // FIXME-GFX10: How should 32 bit mask be handled? + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, + { Builder.getInt64Ty(), Src0->getType() }); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } case AMDGPU::BI__builtin_amdgcn_fcmp: - case AMDGPU::BI__builtin_amdgcn_fcmpf: - return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp); + case AMDGPU::BI__builtin_amdgcn_fcmpf: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + + // FIXME-GFX10: How should 32 bit mask be handled? + Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, + { Builder.getInt64Ty(), Src0->getType() }); + return Builder.CreateCall(F, { Src0, Src1, Src2 }); + } case AMDGPU::BI__builtin_amdgcn_class: case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 7f6ddabb2ac1..df4e7ee202bf 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -41,6 +41,17 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D, if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi)) D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args); + if (Args.getLastArg(options::OPT_mwavefrontsize64)) { + Features.push_back("-wavefrontsize16"); + Features.push_back("-wavefrontsize32"); + Features.push_back("+wavefrontsize64"); + } + if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) { + Features.push_back("-wavefrontsize16"); + Features.push_back("+wavefrontsize32"); + Features.push_back("-wavefrontsize64"); + } + handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); } diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp index 0afd062d4fab..a60485ab03b7 100644 --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -316,15 +316,21 @@ void HIPToolChain::addClangTargetOptions( else FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc"; + llvm::StringRef WaveFrontSizeBC; + if (stoi(GFXVersion) < 1000) + WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc"; + else + WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc"; + BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc", "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", FlushDenormalControlBC, "oclc_correctly_rounded_sqrt_on.amdgcn.bc", - "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC}); + "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC, + WaveFrontSizeBC}); } for (auto Lib : BCLibs) addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib); - } llvm::opt::DerivedArgList * diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 6b7ea52dab0b..bd7fe78fc96f 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -224,28 +224,28 @@ void test_lerp(global int* out, int a, int b, int c) } // CHECK-LABEL: @test_sicmp_i32 -// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32) +// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32) void test_sicmp_i32(global ulong* out, int a, int b) { *out = __builtin_amdgcn_sicmp(a, b, 32); } // CHECK-LABEL: @test_uicmp_i32 -// CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32) +// CHECK: call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 32) void test_uicmp_i32(global ulong* out, uint a, uint b) { *out = __builtin_amdgcn_uicmp(a, b, 32); } // CHECK-LABEL: @test_sicmp_i64 -// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 38) +// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 38) void test_sicmp_i64(global ulong* out, long a, long b) { *out = __builtin_amdgcn_sicmpl(a, b, 39-1); } // CHECK-LABEL: @test_uicmp_i64 -// CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 35) +// CHECK: call i64 @llvm.amdgcn.icmp.i64.i64(i64 %a, i64 %b, i32 35) void test_uicmp_i64(global ulong* out, ulong a, ulong b) { *out = __builtin_amdgcn_uicmpl(a, b, 30+5); @@ -287,14 +287,14 @@ void test_readlane(global int* out, int a, int b) } // CHECK-LABEL: @test_fcmp_f32 -// CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 5) +// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 5) void test_fcmp_f32(global ulong* out, float a, float b) { *out = __builtin_amdgcn_fcmpf(a, b, 5); } // CHECK-LABEL: @test_fcmp_f64 -// CHECK: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 6) +// CHECK: call i64 @llvm.amdgcn.fcmp.i64.f64(double %a, double %b, i32 6) void test_fcmp_f64(global ulong* out, double a, double b) { *out = __builtin_amdgcn_fcmp(a, b, 3+3); diff --git a/clang/test/Driver/amdgpu-features.c b/clang/test/Driver/amdgpu-features.c index 333f03a98670..f2de25f6730e 100644 --- a/clang/test/Driver/amdgpu-features.c +++ b/clang/test/Driver/amdgpu-features.c @@ -24,6 +24,12 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx700 -mno-sram-ecc %s 2>&1 | FileCheck --check-prefix=NO-SRAM-ECC %s // NO-SRAM-ECC: "-target-feature" "-sram-ecc" +// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64" + +// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s +// NO-WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "+wavefrontsize32" "-target-feature" "-wavefrontsize64" + // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mcumode %s 2>&1 | FileCheck --check-prefix=CUMODE %s // CUMODE: "-target-feature" "+cumode"