From a18ae9bd70c2f8e3d0ca8778ac92a4b30c3d1aa4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sat, 17 Oct 2015 13:23:38 +0000
Subject: [PATCH] [CostModel] Fixed AVX integer shift costs

Targets with AVX but without AVX2 were incorrectly reporting costs of 256-bit integer shifts.

llvm-svn: 250611
---
 .../lib/Target/X86/X86TargetTransformInfo.cpp | 48 ++++++++++++++-----
 .../CostModel/X86/vshift-ashr-cost.ll         | 28 +++++------
 .../CostModel/X86/vshift-lshr-cost.ll         | 24 +++++-----
 .../Analysis/CostModel/X86/vshift-shl-cost.ll | 16 +++----
 4 files changed, 70 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 32e0f986695d..c0a12a82a32d 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -248,19 +248,31 @@ int X86TTIImpl::getArithmeticInstrCost(
     // custom.
     // Constant splats are cheaper for the following instructions.
     { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
+    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
     { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
     { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v8i32,  2 }, // pslld
     { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+    { ISD::SHL,  MVT::v4i64,  2 }, // psllq.
 
     { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
     { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
     { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
     { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+    { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.
 
     { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
+    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
     { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
     { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+    { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
     { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
+    { ISD::SRA,  MVT::v4i64,  8 }, // 2 x psrad + shuffle.
 
     { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
     { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
@@ -282,15 +294,22 @@ int X86TTIImpl::getArithmeticInstrCost(
   if (ISD == ISD::SHL &&
       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
     EVT VT = LT.second;
+    // Vector shift left by non uniform constant can be lowered
+    // into vector multiply (pmullw/pmulld).
     if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
         (VT == MVT::v4i32 && ST->hasSSE41()))
-      // Vector shift left by non uniform constant can be lowered
-      // into vector multiply (pmullw/pmulld).
       return LT.first;
+
+    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+    // sequence of extract + two vector multiply + insert.
+    if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
+       (ST->hasAVX() && !ST->hasAVX2()))
+      ISD = ISD::MUL;
+
+    // A vector shift left by non uniform constant is converted
+    // into a vector multiply; the new multiply is eventually
+    // lowered into a sequence of shuffles and 2 x pmuludq.
     if (VT == MVT::v4i32 && ST->hasSSE2())
-      // A vector shift left by non uniform constant is converted
-      // into a vector multiply; the new multiply is eventually
-      // lowered into a sequence of shuffles and 2 x pmuludq.
       ISD = ISD::MUL;
   }
 
@@ -304,20 +323,31 @@ int X86TTIImpl::getArithmeticInstrCost(
     // used for vectorization and we don't want to make vectorized code worse
     // than scalar code.
     { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v8i32, 2*2*5 }, // We optimized this using mul.
     { ISD::SHL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
-    { ISD::SHL,  MVT::v4i64,     8 }, // splat+shuffle sequence.
+    { ISD::SHL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.
 
     { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
+    { ISD::SRL,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
     { ISD::SRL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
+    { ISD::SRL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.
 
     { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
+    { ISD::SRA,  MVT::v32i8,  2*54 }, // unpacked cmpgtb sequence.
     { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRA,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend.
+    { ISD::SRA,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
     { ISD::SRA,  MVT::v2i64,    12 }, // srl/xor/sub sequence.
+    { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.
 
     // It is not a good idea to vectorize division. We have to scalarize it and
     // in the process we will often end up having to spilling regular
@@ -363,12 +393,6 @@ int X86TTIImpl::getArithmeticInstrCost(
   if (ST->hasAVX() && !ST->hasAVX2()) {
     EVT VT = LT.second;
 
-    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
-    // sequence of extract + two vector multiply + insert.
-    if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
-        Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
-      ISD = ISD::MUL;
-
     int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
     if (Idx != -1)
       return LT.first * AVX1CostTable[Idx].Cost;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index cb1aa60f034c..a0d07d7b6ec0 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -26,7 +26,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <4 x i64> %a, %b
@@ -49,7 +49,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -72,7 +72,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i16> %a, %b
@@ -94,7 +94,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, %b
@@ -121,7 +121,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -146,7 +146,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -171,7 +171,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -195,7 +195,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -222,7 +222,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
@@ -245,7 +245,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -268,7 +268,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -290,7 +290,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -316,7 +316,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -384,7 +384,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 08b752b778e2..a686b4368f21 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -27,7 +27,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -51,7 +51,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -74,7 +74,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <16 x i16> %a, %b
@@ -96,7 +96,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <32 x i8> %a, %b
@@ -124,7 +124,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -150,7 +150,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -175,7 +175,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -199,7 +199,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -227,7 +227,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -251,7 +251,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -274,7 +274,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -296,7 +296,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 116828c9964c..85ca5a5a7f32 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -52,7 +52,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -75,7 +75,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <16 x i16> %a, %b
@@ -97,7 +97,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, %b
@@ -151,7 +151,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -176,7 +176,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -200,7 +200,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -298,7 +298,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -325,7 +325,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
-; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift