[CodeGen] Teach X86_64ABIInfo about AVX512.

As specified in the SysV AVX512 ABI drafts. It follows the same scheme
as AVX2: 

    Arguments of type __m512 are split into eight eightbyte chunks.
    The least significant one belongs to class SSE and all the others
    to class SSEUP.

This also means we change the OpenMP SIMD default alignment on AVX512.

Based on r240337.
Differential Revision: http://reviews.llvm.org/D9894

llvm-svn: 240338
This commit is contained in:
Ahmed Bougacha 2015-06-22 21:31:43 +00:00
parent d39a4151b3
commit 0b938284da
4 changed files with 93 additions and 5 deletions

View File

@ -2249,7 +2249,9 @@ public:
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
StringRef getABI() const override {
if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F)
return "avx512";
else if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
return "avx";
else if (getTriple().getArch() == llvm::Triple::x86 &&
MMX3DNowLevel == NoMMX3DNow)

View File

@ -1394,12 +1394,15 @@ namespace {
/// The AVX ABI level for X86 targets.
enum class X86AVXABILevel {
None,
AVX
AVX,
AVX512
};
/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
switch (AVXLevel) {
case X86AVXABILevel::AVX512:
return 512;
case X86AVXABILevel::AVX:
return 256;
case X86AVXABILevel::None:
@ -1956,6 +1959,9 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
// Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
// registers if they are "named", i.e. not part of the "..." of a
// variadic function.
//
// Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
// split into eight eightbyte chunks, one SSE and seven SSEUP.
Lo = SSE;
Hi = SSEUp;
}
@ -7211,7 +7217,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
case llvm::Triple::x86_64: {
StringRef ABI = getTarget().getABI();
X86AVXABILevel AVXLevel = (ABI == "avx" ? X86AVXABILevel::AVX :
X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 :
ABI == "avx" ? X86AVXABILevel::AVX :
X86AVXABILevel::None);
switch (Triple.getOS()) {

View File

@ -1,7 +1,9 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \
// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512
#include <stdarg.h>
// CHECK-LABEL: define signext i8 @f0()
@ -458,3 +460,77 @@ void test54() {
}
// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
typedef float __m512 __attribute__ ((__vector_size__ (64)));
typedef struct {
__m512 m;
} s512;
s512 x55;
__m512 x56;
// Even on AVX512, aggregates of size larger than four eightbytes have class
// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1).
//
// CHECK: declare void @f55(%struct.s512* byval align 64)
void f55(s512 x);
// However, __m512 has type SSE/SSEUP on AVX512.
//
// AVX512: declare void @f56(<16 x float>)
// NO-AVX512: declare void @f56(<16 x float>* byval align 64)
void f56(__m512 x);
void f57() { f55(x55); f56(x56); }
// Like for __m128 on AVX, check that the struct below is passed
// in the same way regardless of AVX512 being used.
//
// CHECK: declare void @f58(%struct.t256* byval align 32)
typedef struct t256 {
__m256 m;
__m256 n;
} two256;
extern void f58(two256 s);
void f59(two256 s) {
f58(s);
}
// CHECK: declare void @f60(%struct.sat256* byval align 32)
typedef struct at256 {
__m256 array[2];
} Atwo256;
typedef struct sat256 {
Atwo256 x;
} SAtwo256;
extern void f60(SAtwo256 s);
void f61(SAtwo256 s) {
f60(s);
}
// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
void f62_helper(int, ...);
__m512 x62;
void f62() {
f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
}
// Like for __m256 on AVX, we always pass __m512 in memory, and don't
// need to use the register save area.
//
// AVX512-LABEL: define void @f63
// AVX512-NOT: br i1
// AVX512: ret void
void f63(__m512 *m, __builtin_va_list argList) {
*m = __builtin_va_arg(argList, __m512);
}
// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})
void f64_helper(__m512, ...);
__m512 x64;
void f64() {
f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
}

View File

@ -1,5 +1,6 @@
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
// RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
// RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
@ -16,6 +17,7 @@ void h1(float *c, float *a, double b[], int size)
// X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
// PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
@ -25,6 +27,7 @@ void h1(float *c, float *a, double b[], int size)
// X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
// PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
// PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31