From 1607a37308cc6993f86ccef1122bb3daab327d5d Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Sat, 9 Feb 2019 00:34:41 +0000 Subject: [PATCH] [AMDGPU] Split dot-insts feature Differential Revision: https://reviews.llvm.org/D57972 llvm-svn: 353588 --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 14 +++++----- clang/lib/Basic/Targets/AMDGPU.cpp | 3 +- clang/test/CodeGenOpenCL/amdgpu-features.cl | 2 +- .../builtins-amdgcn-dl-insts-err.cl | 28 +++++++++---------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index fa3db618be7c..20592c7bf25f 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -145,13 +145,13 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts") // Deep learning builtins. //===----------------------------------------------------------------------===// -TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot-insts") -TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot-insts") -TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot-insts") -TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot-insts") -TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot-insts") -TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot-insts") -TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot-insts") +TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot2-insts") +TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts") +TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts") +TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts") +TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot2-insts") +TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts") +TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts") //===----------------------------------------------------------------------===// // Special builtins. diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index b04d894855ea..7120f6a820d7 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -136,7 +136,8 @@ bool AMDGPUTargetInfo::initFeatureMap( switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { case GK_GFX906: Features["dl-insts"] = true; - Features["dot-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; LLVM_FALLTHROUGH; case GK_GFX909: case GK_GFX904: diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index 7aac4d3a3685..78bdc610617c 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -11,7 +11,7 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s // GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+gfx9-insts,+s-memrealtime,+vi-insts" // GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+fp32-denormals,+fp64-fp16-denormals,+s-memrealtime,+vi-insts" // GFX700: "target-features"="+ci-insts,+fp64-fp16-denormals,-fp32-denormals" // GFX600: "target-features"="+fp64-fp16-denormals,-fp32-denormals" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl index e2c03a471baa..a82fcbd758db 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-dl-insts-err.cl @@ -12,24 +12,24 @@ kernel void builtins_amdgcn_dl_insts_err( half2 v2hA, half2 v2hB, float fC, short2 v2ssA, short2 v2ssB, int siA, int siB, int siC, ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) { - fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}} - fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot-insts}} + fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}} + fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}} - siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}} - siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot-insts}} + siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}} + siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}} - uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}} - uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot-insts}} + uiOut[0] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, false); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}} + uiOut[1] = __builtin_amdgcn_udot2(v2usA, v2usB, uiC, true); // expected-error {{'__builtin_amdgcn_udot2' needs target feature dot2-insts}} - siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}} - siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot-insts}} + siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}} + siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}} - uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}} - uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot-insts}} + uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}} + uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}} - siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}} - siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot-insts}} + siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}} + siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}} - uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}} - uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot-insts}} + uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}} + uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}} }