[AMDGPU] Add gfx940 target

This is target definition only.

Differential Revision: https://reviews.llvm.org/D120688
This commit is contained in:
Stanislav Mekhanoshin 2022-02-28 14:20:12 -08:00
parent 59262d2d4b
commit 2e2e64df4a
31 changed files with 329 additions and 22 deletions

View File

@ -85,6 +85,7 @@ enum class CudaArch {
GFX909,
GFX90a,
GFX90c,
GFX940,
GFX1010,
GFX1011,
GFX1012,

View File

@ -113,6 +113,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(909), // gfx909
GFX(90a), // gfx90a
GFX(90c), // gfx90c
GFX(940), // gfx940
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012

View File

@ -227,6 +227,9 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
break;
case GK_GFX940:
Features["gfx940-insts"] = true;
LLVM_FALLTHROUGH;
case GK_GFX90A:
Features["gfx90a-insts"] = true;
LLVM_FALLTHROUGH;

View File

@ -205,6 +205,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:

View File

@ -3916,6 +3916,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:

View File

@ -25,6 +25,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@ -58,6 +59,7 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"

View File

@ -107,6 +107,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx909
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012

View File

@ -92,6 +92,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefix=GFX909 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@ -126,6 +127,7 @@
// GFX909: "-target-cpu" "gfx909"
// GFX90A: "-target-cpu" "gfx90a"
// GFX90C: "-target-cpu" "gfx90c"
// GFX940: "-target-cpu" "gfx940"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"

View File

@ -29,6 +29,8 @@
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -x hip -target x86_64-linux-gnu --cuda-gpu-arch=gfx90a -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=gfx940 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s

View File

@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'

View File

@ -374,6 +374,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
- Ryzen 3 Pro 4350G
- Ryzen 3 Pro 4350GE
``gfx940`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
- tgsplit flat
- xnack scratch .. TODO::
- Packed
work-item Add product
IDs names.
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@ -1224,7 +1231,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
*reserved* 0x040 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
*reserved* 0x041 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
*reserved* 0x043 Reserved.
@ -3866,7 +3873,7 @@ The fields used by CP for code objects before V3 also match those specified in
bytes
383:352 4 bytes COMPUTE_PGM_RSRC3 GFX6-GFX9
Reserved, must be 0.
GFX90A
GFX90A, GFX940
Compute Shader (CS)
program settings used by
CP to set up
@ -3960,7 +3967,7 @@ The fields used by CP for code objects before V3 also match those specified in
GFX6-GFX9
- vgprs_used 0..256
- max(0, ceil(vgprs_used / 4) - 1)
GFX90A
GFX90A, GFX940
- vgprs_used 0..512
- vgprs_used = align(arch_vgprs, 4)
+ acc_vgprs
@ -4408,7 +4415,7 @@ The fields used by CP for code objects before V3 also match those specified in
..
.. table:: compute_pgm_rsrc3 for GFX90A
.. table:: compute_pgm_rsrc3 for GFX90A, GFX940
:name: amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table
======= ======= =============================== ===========================================================================
@ -12268,7 +12275,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_user_sgpr_count`` 0 GFX6-GFX10 Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`
``.amdhsa_user_sgpr_private_segment_buffer`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
(except :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
GFX940)
``.amdhsa_user_sgpr_dispatch_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_PTR in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_queue_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_QUEUE_PTR in
@ -12278,7 +12286,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_user_sgpr_dispatch_id`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_ID in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_flat_scratch_init`` 0 GFX6-GFX10 Controls ENABLE_SGPR_FLAT_SCRATCH_INIT in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
(except :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
GFX940)
``.amdhsa_user_sgpr_private_segment_size`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_wavefront_size32`` Target GFX10 Controls ENABLE_WAVEFRONT_SIZE32 in
@ -12286,6 +12295,9 @@ terminated by an ``.end_amdhsa_kernel`` directive.
Specific
(wavefrontsize64)
``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in
(except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
GFX940)
``.amdhsa_enable_private_segment`` 0 GFX940 Controls ENABLE_PRIVATE_SEGMENT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
``.amdhsa_system_sgpr_workgroup_id_x`` 1 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_ID_X in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
@ -12305,15 +12317,15 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_next_free_sgpr`` Required GFX6-GFX10 Maximum SGPR number explicitly referenced, plus one.
Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_accum_offset`` Required GFX90A Offset of a first AccVGPR in the unified register file.
Used to calculate ACCUM_OFFSET in
``.amdhsa_accum_offset`` Required GFX90A, Offset of a first AccVGPR in the unified register file.
GFX940 Used to calculate ACCUM_OFFSET in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
``.amdhsa_reserve_vcc`` 1 GFX6-GFX10 Whether the kernel may use the special VCC SGPR.
Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_reserve_flat_scratch`` 1 GFX7-GFX10 Whether the kernel may use flat instructions to access
scratch memory. Used to calculate
GRANULATED_WAVEFRONT_SGPR_COUNT in
(except scratch memory. Used to calculate
GFX940) GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_reserve_xnack_mask`` Target GFX8-GFX10 Whether the kernel may trigger XNACK replay.
Feature Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
@ -12341,8 +12353,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_fp16_overflow`` 0 GFX9-GFX10 Controls FP16_OVFL in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_tg_split`` Target GFX90A Controls TG_SPLIT in
Feature :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
``.amdhsa_tg_split`` Target GFX90A, Controls TG_SPLIT in
Feature GFX940 :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
Specific
(tgsplit)
``.amdhsa_workgroup_processor_mode`` Target GFX10 Controls ENABLE_WGP_MODE in

View File

@ -754,7 +754,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X41 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X43 = 0x043,

View File

@ -86,6 +86,7 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX90A = 66,
GK_GFX90C = 67,
GK_GFX940 = 68,
GK_GFX1010 = 71,
GK_GFX1011 = 72,

View File

@ -459,6 +459,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90a";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
return "gfx940";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:

View File

@ -563,6 +563,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);

View File

@ -104,6 +104,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
@ -217,6 +218,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX909: return {9, 0, 9};
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
case GK_GFX940: return {9, 4, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};

View File

@ -313,6 +313,12 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
"Additional instructions for GFX90A+"
>;
def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
"GFX940Insts",
"true",
"Additional instructions for GFX940+"
>;
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
@ -1016,6 +1022,30 @@ def FeatureISAVersion9_0_C : FeatureSet<
FeatureMadMacF32Insts,
FeatureImageGather4D16Bug]>;
def FeatureISAVersion9_4_0 : FeatureSet<
[FeatureGFX9,
FeatureGFX90AInsts,
FeatureGFX940Insts,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot3Insts,
FeatureDot4Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
Feature64BitDPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
FeaturePkFmacF16Inst,
FeatureAtomicFaddInsts,
FeatureSupportsSRAMECC,
FeaturePackedTID,
FeatureArchitectedFlatScratch,
FullRate64Ops]>;
// TODO: Organize more features into groups.
def FeatureGroup {
// Bugs present on gfx10.1.
@ -1293,12 +1323,22 @@ def isGFX8GFX9NotGFX90A :
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
def isGFX90AOnly :
Predicate<"Subtarget->hasGFX90AInsts()">,
AssemblerPredicate<(all_of FeatureGFX90AInsts)>;
Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
def isGFX908orGFX90A :
Predicate<"Subtarget->hasMAIInsts()">,
AssemblerPredicate<(all_of FeatureMAIInsts)>;
Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>;
def isGFX940Plus :
Predicate<"Subtarget->hasGFX940Insts()">,
AssemblerPredicate<(all_of FeatureGFX940Insts)>;
def isGFX8GFX9NotGFX940 :
Predicate<"!Subtarget->hasGFX940Insts() &&"
"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>;
def isGFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
@ -1327,7 +1367,7 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>;
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;

View File

@ -216,6 +216,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
GFX8Insts(false),
GFX9Insts(false),
GFX90AInsts(false),
GFX940Insts(false),
GFX10Insts(false),
GFX10_3Insts(false),
GFX7GFX8GFX9Insts(false),

View File

@ -192,6 +192,10 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
FeatureISAVersion9_0_C.Features
>;
def : ProcessorModel<"gfx940", SIDPFullSpeedModel,
FeatureISAVersion9_4_0.Features
>;
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//

View File

@ -101,6 +101,7 @@ protected:
bool GFX8Insts;
bool GFX9Insts;
bool GFX90AInsts;
bool GFX940Insts;
bool GFX10Insts;
bool GFX10_3Insts;
bool GFX7GFX8GFX9Insts;
@ -559,7 +560,7 @@ public:
// The ST addressing mode means no registers are used, either VGPR or SGPR,
// but only immediate offset is swizzled and added to the FLAT scratch base.
bool hasFlatScratchSTMode() const {
return hasFlatScratchInsts() && hasGFX10_3Insts();
return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
}
bool hasScalarFlatScratchInsts() const {
@ -962,6 +963,10 @@ public:
bool hasPackedTID() const { return HasPackedTID; }
// GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
// hasGFX90AInsts is also true.
bool hasGFX940Insts() const { return GFX940Insts; }
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

View File

@ -106,6 +106,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@ -169,6 +170,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;

View File

@ -71,6 +71,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
@ -163,6 +166,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=GFX940 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX940-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX940-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@ -216,6 +222,8 @@
; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack"
; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack"
; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc"
; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
@ -282,6 +290,9 @@
; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-"
; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+"
; GFX940: .amdgcn_target "amdgcn-amd-amdhsa--gfx940"
; GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack-"
; GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"

View File

@ -54,6 +54,7 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@ -115,6 +116,7 @@
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
; GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)

View File

@ -9,6 +9,9 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
@ -36,6 +39,11 @@
; SRAM-ECC-GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
; SRAM-ECC-GFX90A: ]
; SRAM-ECC-GFX940: Flags [
; SRAM-ECC-GFX940: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; SRAM-ECC-GFX940: ]
define amdgpu_kernel void @elf_header() {
ret void
}

View File

@ -1,6 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX90C-VALID %s
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX90C-ERROR %s
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX940-ERROR %s
; GFX90C-VALID: .hsa_code_object_isa 9,0,12,"AMD","AMDGPU"
; GFX90C-VALID: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx90c"
; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK being ON or ANY
; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940

View File

@ -0,0 +1,176 @@
// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s
// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s > %t
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
// big endian not supported
// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc
// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// OBJDUMP: Contents of section .rodata
// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
// minimal
// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
// OBJDUMP-NEXT: 0070 01510104 130f007f 5e000000 00000000
.text
// ASM: .text
.amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
.p2align 8
.type minimal,@function
minimal:
s_endpgm
.p2align 8
.type complete,@function
complete:
s_endpgm
.rodata
// ASM: .rodata
// Test that only specifying required directives is allowed, and that defaulted
// values are omitted.
.p2align 6
.amdhsa_kernel minimal
.amdhsa_next_free_vgpr 0
.amdhsa_next_free_sgpr 0
.amdhsa_accum_offset 4
.end_amdhsa_kernel
// ASM: .amdhsa_kernel minimal
// ASM: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
// ASM-NEXT: .amdhsa_accum_offset 4
// ASM: .amdhsa_tg_split 0
// ASM: .end_amdhsa_kernel
// Test that we can specify all available directives with non-default values.
.p2align 6
.amdhsa_kernel complete
.amdhsa_group_segment_fixed_size 1
.amdhsa_private_segment_fixed_size 1
.amdhsa_user_sgpr_dispatch_ptr 1
.amdhsa_user_sgpr_queue_ptr 1
.amdhsa_user_sgpr_kernarg_segment_ptr 1
.amdhsa_user_sgpr_dispatch_id 1
.amdhsa_user_sgpr_private_segment_size 1
.amdhsa_enable_private_segment 1
.amdhsa_system_sgpr_workgroup_id_x 0
.amdhsa_system_sgpr_workgroup_id_y 1
.amdhsa_system_sgpr_workgroup_id_z 1
.amdhsa_system_sgpr_workgroup_info 1
.amdhsa_system_vgpr_workitem_id 1
.amdhsa_next_free_vgpr 9
.amdhsa_next_free_sgpr 27
.amdhsa_accum_offset 4
.amdhsa_reserve_vcc 0
.amdhsa_float_round_mode_32 1
.amdhsa_float_round_mode_16_64 1
.amdhsa_float_denorm_mode_32 1
.amdhsa_float_denorm_mode_16_64 0
.amdhsa_dx10_clamp 0
.amdhsa_ieee_mode 0
.amdhsa_fp16_overflow 1
.amdhsa_tg_split 1
.amdhsa_exception_fp_ieee_invalid_op 1
.amdhsa_exception_fp_denorm_src 1
.amdhsa_exception_fp_ieee_div_zero 1
.amdhsa_exception_fp_ieee_overflow 1
.amdhsa_exception_fp_ieee_underflow 1
.amdhsa_exception_fp_ieee_inexact 1
.amdhsa_exception_int_div_zero 1
.end_amdhsa_kernel
// ASM: .amdhsa_kernel complete
// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
// ASM-NEXT: .amdhsa_kernarg_size 0
// ASM-NEXT: .amdhsa_user_sgpr_count 9
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
// ASM-NEXT: .amdhsa_enable_private_segment 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
// ASM-NEXT: .amdhsa_next_free_vgpr 9
// ASM-NEXT: .amdhsa_next_free_sgpr 27
// ASM-NEXT: .amdhsa_accum_offset 4
// ASM-NEXT: .amdhsa_reserve_vcc 0
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 1
// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
// ASM-NEXT: .amdhsa_dx10_clamp 0
// ASM-NEXT: .amdhsa_ieee_mode 0
// ASM-NEXT: .amdhsa_fp16_overflow 1
// ASM-NEXT: .amdhsa_tg_split 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
// ASM-NEXT: .amdhsa_exception_int_div_zero 1
// ASM-NEXT: .end_amdhsa_kernel
.section .foo
.byte .amdgcn.gfx_generation_number
// ASM: .byte 9
.byte .amdgcn.next_free_vgpr
// ASM: .byte 0
.byte .amdgcn.next_free_sgpr
// ASM: .byte 0
v_mov_b32_e32 v7, s10
.byte .amdgcn.next_free_vgpr
// ASM: .byte 8
.byte .amdgcn.next_free_sgpr
// ASM: .byte 11
.set .amdgcn.next_free_vgpr, 0
.set .amdgcn.next_free_sgpr, 0
.byte .amdgcn.next_free_vgpr
// ASM: .byte 0
.byte .amdgcn.next_free_sgpr
// ASM: .byte 0
v_mov_b32_e32 v16, s3
.byte .amdgcn.next_free_vgpr
// ASM: .byte 17
.byte .amdgcn.next_free_sgpr
// ASM: .byte 4

View File

@ -150,6 +150,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX90C | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX90C %s
# RUN: obj2yaml %t.o.AMDGCN_GFX90C | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX90C %s
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX940/' %s | yaml2obj -o %t.o.AMDGCN_GFX940
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX940 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX940 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX940 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX940 %s
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
@ -321,6 +325,9 @@
# ELF-AMDGCN-GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
# YAML-AMDGCN-GFX90C: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX90C ]
# ELF-AMDGCN-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
# YAML-AMDGCN-GFX940: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX940 ]
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]

View File

@ -58,6 +58,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx940 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx90c %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt

View File

@ -196,6 +196,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33

View File

@ -1527,6 +1527,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@ -1581,6 +1582,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),

View File

@ -96,7 +96,7 @@ foreach(sm ${nvptx_sm_list})
endif()
endforeach()
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx1010 gfx1030 gfx1031)
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx1010 gfx1030 gfx1031)
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
endif()