From f9a995d68c5877e5e81c66b656cf8fffdccb62d6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 6 Mar 2014 17:34:12 +0000 Subject: [PATCH] R600: Fix extloads from i8 / i16 to i64. This appears to only be working for global loads. Private and local break for other reasons. llvm-svn: 203135 --- llvm/include/llvm/CodeGen/ISDOpcodes.h | 2 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 ++++ llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 16 +++- llvm/lib/Target/R600/SIISelLowering.cpp | 3 + llvm/test/CodeGen/R600/extload.ll | 77 +++++++++++++++++-- 5 files changed, 106 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index da8ac792e693..89b09086251a 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -702,6 +702,8 @@ namespace ISD { LAST_LOADEXT_TYPE }; + NodeType getExtForLoadExtType(LoadExtType); + //===--------------------------------------------------------------------===// /// ISD::CondCode enum - These are ordered carefully to make the bitfields /// below work out, when considering SETFALSE (something that never exists diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 57bfa4f13924..eac55124c244 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -231,6 +231,21 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 9978493b4d13..e43f086a11b6 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -692,6 +692,20 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast(Op); ISD::LoadExtType ExtType = Load->getExtensionType(); + EVT VT = Op.getValueType(); + EVT MemVT = Load->getMemoryVT(); + + if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) { + // We can do the extload to 32-bits, and then need to separately extend to + // 64-bits. + + SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32, + Load->getChain(), + Load->getBasePtr(), + MemVT, + Load->getMemOperand()); + return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32); + } // Lower loads constant address space global variable loads if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && @@ -711,8 +725,6 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return SDValue(); - EVT VT = Op.getValueType(); - EVT MemVT = Load->getMemoryVT(); unsigned Mask = 0; if (Load->getMemoryVT() == MVT::i8) { Mask = 0xff; diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index b64e2deb19d5..465e3bacc759 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -128,6 +128,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); diff --git a/llvm/test/CodeGen/R600/extload.ll b/llvm/test/CodeGen/R600/extload.ll index f78cdc4fb02f..2e70d4704705 100644 --- a/llvm/test/CodeGen/R600/extload.ll +++ b/llvm/test/CodeGen/R600/extload.ll @@ -1,6 +1,7 @@ -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s +; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; EG-LABEL: @anyext_load_i8: +; FUNC-LABEL: @anyext_load_i8: ; EG: AND_INT ; EG: 255 define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind { @@ -12,7 +13,7 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac ret void } -; EG-LABEL: @anyext_load_i16: +; FUNC-LABEL: @anyext_load_i16: ; EG: AND_INT ; EG: AND_INT ; EG-DAG: 65535 @@ -26,7 +27,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs ret void } -; EG-LABEL: @anyext_load_lds_i8: +; FUNC-LABEL: @anyext_load_lds_i8: ; EG: AND_INT ; EG: 255 define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind { @@ -38,7 +39,7 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr ret void } -; EG-LABEL: @anyext_load_lds_i16: +; FUNC-LABEL: @anyext_load_lds_i16: ; EG: AND_INT ; EG: AND_INT ; EG-DAG: 65535 @@ -51,3 +52,69 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1 ret void } + +; FUNC-LABEL: @sextload_global_i8_to_i64 +; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]], +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: BUFFER_STORE_DWORDX2 +define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in, align 8 + %ext = sext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @sextload_global_i16_to_i64 +; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]], +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: BUFFER_STORE_DWORDX2 +define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in, align 8 + %ext = sext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @sextload_global_i32_to_i64 +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], +; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]] +; SI: BUFFER_STORE_DWORDX2 +define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %a = load i32 addrspace(1)* %in, align 8 + %ext = sext i32 %a to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @zextload_global_i8_to_i64 +; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]], +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0 +; SI: BUFFER_STORE_DWORDX2 +define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind { + %a = load i8 addrspace(1)* %in, align 8 + %ext = zext i8 %a to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @zextload_global_i16_to_i64 +; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]], +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0 +; SI: BUFFER_STORE_DWORDX2 +define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind { + %a = load i16 addrspace(1)* %in, align 8 + %ext = zext i16 %a to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: @zextload_global_i32_to_i64 +; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]], +; SI: V_MOV_B32_e32 {{v[0-9]+}}, 0 +; SI: BUFFER_STORE_DWORDX2 +define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { + %a = load i32 addrspace(1)* %in, align 8 + %ext = zext i32 %a to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +}