From 7900334dd530416b70fb04c8abb6f8c2c65da86d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 14 Apr 2016 21:58:07 +0000 Subject: [PATCH] AMDGPU: Fold bitcasts of scalar constants to vectors This cleans up some messes since the individual scalar components can be CSEed. llvm-svn: 266376 --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 34 ++++++++ llvm/test/CodeGen/AMDGPU/fceil64.ll | 4 +- llvm/test/CodeGen/AMDGPU/sdivrem64.ll | 78 +++++++++---------- llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll | 10 +-- llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll | 7 +- 5 files changed, 83 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 75afce0534b3..0d325a323474 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -399,6 +399,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::FADD); setTargetDAGCombine(ISD::FSUB); + setTargetDAGCombine(ISD::BITCAST); + setBooleanContents(ZeroOrNegativeOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -2547,6 +2549,38 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, switch(N->getOpcode()) { default: break; + case ISD::BITCAST: { + EVT DestVT = N->getValueType(0); + if (DestVT.getSizeInBits() != 64 && !DestVT.isVector()) + break; + + // Fold bitcasts of constants. + // + // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k) + // TODO: Generalize and move to DAGCombiner + SDValue Src = N->getOperand(0); + if (ConstantSDNode *C = dyn_cast(Src)) { + assert(Src.getValueType() == MVT::i64); + SDLoc SL(N); + uint64_t CVal = C->getZExtValue(); + return DAG.getNode(ISD::BUILD_VECTOR, SL, DestVT, + DAG.getConstant(Lo_32(CVal), SL, MVT::i32), + DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); + } + + if (ConstantFPSDNode *C = dyn_cast(Src)) { + const APInt &Val = C->getValueAPF().bitcastToAPInt(); + SDLoc SL(N); + uint64_t CVal = Val.getZExtValue(); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + DAG.getConstant(Lo_32(CVal), SL, MVT::i32), + DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); + + return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec); + } + + break; + } case ISD::SHL: { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) break; diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll index eb90b75b2088..579cbf435e78 100644 --- a/llvm/test/CodeGen/AMDGPU/fceil64.ll +++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s @@ -25,7 +25,7 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; SI-DAG: cndmask_b32 ; SI-DAG: v_cmp_lt_f64 ; SI-DAG: v_cmp_lg_f64 -; SI: s_and_b64 +; SI-DAG: s_and_b64 ; SI: v_cndmask_b32 ; SI: v_cndmask_b32 ; SI: v_add_f64 diff --git a/llvm/test/CodeGen/AMDGPU/sdivrem64.ll b/llvm/test/CodeGen/AMDGPU/sdivrem64.ll index a9b2b7f9df55..a7ce948acd4f 100644 --- a/llvm/test/CodeGen/AMDGPU/sdivrem64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdivrem64.ll @@ -1,8 +1,8 @@ -;RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s +;RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC %s ;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s -;FUNC-LABEL: {{^}}test_sdiv: +;FUNC-LABEL: {{^}}s_test_sdiv: ;EG: RECIP_UINT ;EG: LSHL {{.*}}, 1, ;EG: BFE_UINT @@ -36,47 +36,47 @@ ;EG: BFE_UINT ;EG: BFE_UINT -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN: v_bfe_u32 -;GCN-NOT: v_mad_f32 -;SI-NOT: v_lshr_b64 -;VI-NOT: v_lshrrev_b64 -;GCN: s_endpgm -define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN: s_bfe_u32 +; GCN-NOT: v_mad_f32 +; SI-NOT: v_lshr_b64 +; VI-NOT: v_lshrrev_b64 +; GCN: s_endpgm +define void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = sdiv i64 %x, %y store i64 %result, i64 addrspace(1)* %out ret void } -;FUNC-LABEL: {{^}}test_srem: +;FUNC-LABEL: {{^}}s_test_srem: ;EG: RECIP_UINT ;EG: BFE_UINT ;EG: BFE_UINT @@ -144,7 +144,7 @@ define void @test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;SI-NOT: v_lshr_b64 ;VI-NOT: v_lshrrev_b64 ;GCN: s_endpgm -define void @test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y store i64 %result, i64 addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll index a7e777cd0466..b9c34c40c396 100644 --- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -10,14 +10,14 @@ define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { ret void } -; FIXME: select on 0, 0 ; SI-LABEL: {{^}}sint_to_fp_i1_f64: ; SI: v_cmp_eq_i32_e64 vcc, ; We can't fold the SGPRs into v_cndmask_b32_e64, because it already ; uses an SGPR (implicit vcc). -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc -; SI: buffer_store_dwordx2 +; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}} +; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}} + ; SI: s_endpgm define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { %cmp = icmp eq i32 %in, 0 diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll index c35f5099fbe9..2723c0dc95e9 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll @@ -70,14 +70,13 @@ define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i ret void } -; FIXME: select on 0, 0 ; SI-LABEL: {{^}}uint_to_fp_i1_to_f64: ; SI: v_cmp_eq_i32_e64 vcc ; We can't fold the SGPRs into v_cndmask_b32_e32, because it already ; uses an SGPR (implicit vcc). -; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 0, vcc -; SI: buffer_store_dwordx2 +; SI-DAG: v_cndmask_b32_e32 v[[SEL:[0-9]+]], 0, v{{[0-9]+}} +; SI-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} +; SI: buffer_store_dwordx2 v{{\[}}[[ZERO]]:[[SEL]]{{\]}} ; SI: s_endpgm define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { %cmp = icmp eq i32 %in, 0