From bb3f931123e9a13ce66e27b25406570d8e3c36b8 Mon Sep 17 00:00:00 2001 From: Vincent Lejeune Date: Wed, 31 Jul 2013 19:32:07 +0000 Subject: [PATCH] R600: Avoid more than 4 literals in the same instruction group at scheduling llvm-svn: 187515 --- llvm/lib/Target/R600/R600InstrInfo.cpp | 5 ++ llvm/test/CodeGen/R600/max-literals.ll | 68 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 llvm/test/CodeGen/R600/max-literals.ll diff --git a/llvm/lib/Target/R600/R600InstrInfo.cpp b/llvm/lib/Target/R600/R600InstrInfo.cpp index 7ef3d85123a1..812675a61d8c 100644 --- a/llvm/lib/Target/R600/R600InstrInfo.cpp +++ b/llvm/lib/Target/R600/R600InstrInfo.cpp @@ -563,6 +563,7 @@ bool R600InstrInfo::fitsConstReadLimitations(const std::vector &MIs) const { std::vector Consts; + SmallSet Literals; for (unsigned i = 0, n = MIs.size(); i < n; i++) { MachineInstr *MI = MIs[i]; if (!isALUInstr(MI->getOpcode())) @@ -573,6 +574,10 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector &MIs) for (unsigned j = 0, e = Srcs.size(); j < e; j++) { std::pair Src = Srcs[j]; + if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) + Literals.insert(Src.second); + if (Literals.size() > 4) + return false; if (Src.first->getReg() == AMDGPU::ALU_CONST) Consts.push_back(Src.second); if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || diff --git a/llvm/test/CodeGen/R600/max-literals.ll b/llvm/test/CodeGen/R600/max-literals.ll new file mode 100644 index 000000000000..c31b7c06bbed --- /dev/null +++ b/llvm/test/CodeGen/R600/max-literals.ll @@ -0,0 +1,68 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +; CHECK: @main +; CHECK: ADD * + +define void @main() #0 { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = call float @llvm.R600.load.input(i32 8) + %5 = fadd float %0, 2.0 + %6 = fadd float %1, 3.0 + %7 = fadd float %2, 4.0 + %8 = fadd float %3, 5.0 + %9 = bitcast float %4 to i32 + %10 = mul i32 %9, 6 + %11 = bitcast i32 %10 to float + %12 = insertelement <4 x float> undef, float %5, i32 0 + %13 = insertelement <4 x float> %12, float %6, i32 1 + %14 = insertelement <4 x float> %13, float %7, i32 2 + %15 = insertelement <4 x float> %14, float %8, i32 3 + %16 = insertelement <4 x float> %15, float %11, i32 3 + + %17 = call float @llvm.AMDGPU.dp4(<4 x float> %15,<4 x float> %16) + %18 = insertelement <4 x float> undef, float %17, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2) + ret void +} + +; CHECK: @main +; CHECK-NOT: ADD * + +define void @main2() #0 { +main_body: + %0 = call float @llvm.R600.load.input(i32 4) + %1 = call float @llvm.R600.load.input(i32 5) + %2 = call float @llvm.R600.load.input(i32 6) + %3 = call float @llvm.R600.load.input(i32 7) + %4 = call float @llvm.R600.load.input(i32 8) + %5 = fadd float %0, 2.0 + %6 = fadd float %1, 3.0 + %7 = fadd float %2, 4.0 + %8 = fadd float %3, 2.0 + %9 = bitcast float %4 to i32 + %10 = mul i32 %9, 6 + %11 = bitcast i32 %10 to float + %12 = insertelement <4 x float> undef, float %5, i32 0 + %13 = insertelement <4 x float> %12, float %6, i32 1 + %14 = insertelement <4 x float> %13, float %7, i32 2 + %15 = insertelement <4 x float> %14, float %8, i32 3 + %16 = insertelement <4 x float> %15, float %11, i32 3 + + %17 = call float @llvm.AMDGPU.dp4(<4 x float> %15,<4 x float> %16) + %18 = insertelement <4 x float> undef, float %17, i32 0 + call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2) + ret void +} + +; Function Attrs: readnone +declare float @llvm.R600.load.input(i32) #1 +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 + +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) + +attributes #0 = { "ShaderType"="1" } +attributes #1 = { readnone }