AMD family 17h (znver1) enablement

Summary:
This patch enables the following
1. AMD family 17h architecture using "znver1" tune flag (-march, -mcpu).
2. ISAs that are enabled for "znver1" architecture.
3. Checks ADX isa from cpuid to identify "znver1" flag when -march=native is used.
4. ISAs FMA4, XOP are disabled as they are dropped from amdfam17.
5. For the time being, it uses the btver2 scheduler model.
6. Test file is updated to check this flag.

This item is linked to clang review item https://reviews.llvm.org/D28018

Patch by Ganesh Gopalasubramanian

Reviewers: RKSimon, craig.topper

Subscribers: vprasad, RKSimon, ashutosh.nema, llvm-commits

Differential Revision: https://reviews.llvm.org/D28017

llvm-svn: 291543
This commit is contained in:
Craig Topper 2017-01-10 06:01:16 +00:00
parent e6663d376e
commit d55b83128b
6 changed files with 60 additions and 0 deletions

View File

@ -111,6 +111,7 @@ enum ProcessorTypes {
AMDATHLON,
AMDFAM14H,
AMDFAM16H,
AMDFAM17H,
CPU_TYPE_MAX
};
@ -149,6 +150,7 @@ enum ProcessorSubtypes {
AMD_BTVER2,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
CPU_SUBTYPE_MAX
};
@ -742,6 +744,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family,
}
*Subtype = AMD_BTVER2;
break; // "btver2"
case 23:
*Type = AMDFAM17H;
if (Features & (1 << FEATURE_ADX)) {
*Subtype = AMDFAM17H_ZNVER1;
break; // "znver1"
}
*Subtype = AMD_BTVER1;
break;
default:
break; // "generic"
}
@ -950,6 +960,15 @@ StringRef sys::getHostCPUName() {
default:
return "amdfam16";
}
case AMDFAM17H:
switch (Subtype) {
case AMD_BTVER1:
return "btver1";
case AMDFAM17H_ZNVER1:
return "znver1";
default:
return "amdfam17";
}
default:
return "generic";
}

View File

@ -760,6 +760,42 @@ def : Proc<"bdver4", [
FeatureMWAITX
]>;
// TODO: The scheduler model falls to BTVER2 model.
// The znver1 model has to be put in place.
// Zen
def: ProcessorModel<"znver1", BtVer2Model, [
FeatureADX,
FeatureAES,
FeatureAVX2,
FeatureBMI,
FeatureBMI2,
FeatureCLFLUSHOPT,
FeatureCMPXCHG16B,
FeatureF16C,
FeatureFMA,
FeatureFSGSBase,
FeatureFXSR,
FeatureFastLZCNT,
FeatureLAHFSAHF,
FeatureLZCNT,
FeatureMMX,
FeatureMOVBE,
FeatureMWAITX,
FeaturePCLMUL,
FeaturePOPCNT,
FeaturePRFCHW,
FeatureRDRAND,
FeatureRDSEED,
FeatureSHA,
FeatureSMAP,
FeatureSSE4A,
FeatureSlowSHLD,
FeatureX87,
FeatureXSAVE,
FeatureXSAVEC,
FeatureXSAVEOPT,
FeatureXSAVES]>;
def : Proc<"geode", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;

View File

@ -33,3 +33,4 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty

View File

@ -3,6 +3,8 @@
; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt))
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=NOFASTLZCNT %s
; Test one 32-bit input, output is 32-bit, no transformations expected.
define i32 @test_zext_cmp0(i32 %a) {

View File

@ -46,6 +46,7 @@
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=FAST
; Other chips with slow unaligned memory accesses

View File

@ -17,6 +17,7 @@
; RUN: llc < %s -march=x86-64 -mcpu=bdver2 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=bdver3 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=bdver4 | FileCheck %s
; RUN: llc < %s -march=x86-64 -mcpu=znver1 | FileCheck %s
; Verify that for the X86_64 processors that are known to have poor latency
; double precision shift instructions we do not generate 'shld' or 'shrd'