[SystemZ] Support LRVH and STRVH opcodes

Summary: On Linux, /usr/include/bits/byteswap-16.h defines __byteswap_16(x) as an inlined LRVH (Load Reversed Half-word) instruction. The SystemZ back-end did not support this opcode and the inlined assembly would cause a fatal error.

Reviewers: bryanpkc, uweigand

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D18732

llvm-svn: 269688
This commit is contained in:
Bryan Chan 2016-05-16 20:32:22 +00:00
parent 7ffd0b4409
commit 28b759c4c8
8 changed files with 417 additions and 5 deletions

View File

@ -436,6 +436,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@ -4676,6 +4677,8 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_LOADW_UMIN);
OPCODE(ATOMIC_LOADW_UMAX);
OPCODE(ATOMIC_CMP_SWAPW);
OPCODE(LRV);
OPCODE(STRV);
OPCODE(PREFETCH);
}
return nullptr;
@ -4971,6 +4974,74 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
// Combine BSWAP (LOAD) into LRVH/LRV/LRVG
// These loads are allowed to access memory multiple times, and so we must check
// that the loads are not volatile before performing the combine.
if (Opcode == ISD::BSWAP &&
ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
(N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
N->getValueType(0) == MVT::i64) &&
!cast<LoadSDNode>(N->getOperand(0))->isVolatile()) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
// Create the byte-swapping load.
SDValue Ops[] = {
LD->getChain(), // Chain
LD->getBasePtr(), // Ptr
DAG.getValueType(N->getValueType(0)) // VT
};
SDValue BSLoad =
DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
Ops, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
if (N->getValueType(0) == MVT::i16)
ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
// First, combine the bswap away. This makes the value produced by the
// load dead.
DCI.CombineTo(N, ResVal);
// Next, combine the load away, we give it a bogus result value but a real
// chain result. The result value is dead because the bswap is dead.
DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
// Combine STORE (BSWAP) into STRVH/STRV/STRVG
// See comment above about volatile accesses.
if (Opcode == ISD::STORE &&
!cast<StoreSDNode>(N)->isVolatile() &&
N->getOperand(1).getOpcode() == ISD::BSWAP &&
N->getOperand(1).getNode()->hasOneUse() &&
(N->getOperand(1).getValueType() == MVT::i16 ||
N->getOperand(1).getValueType() == MVT::i32 ||
N->getOperand(1).getValueType() == MVT::i64)) {
SDValue BSwapOp = N->getOperand(1).getOperand(0);
if (BSwapOp.getValueType() == MVT::i16)
BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
SDValue Ops[] = {
N->getOperand(0), BSwapOp, N->getOperand(2),
DAG.getValueType(N->getOperand(1).getValueType())
};
return
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
return SDValue();
}

View File

@ -311,6 +311,19 @@ enum NodeType : unsigned {
// Operand 5: the width of the field in bits (8 or 16)
ATOMIC_CMP_SWAPW,
// Byte swapping load.
//
// Operand 0: the address to load from
// Operand 1: the type of load (i16, i32, i64)
LRV,
// Byte swapping store.
//
// Operand 0: the value to store
// Operand 1: the address to store to
// Operand 2: the type of store (i16, i32, i64)
STRV,
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.

View File

@ -670,13 +670,14 @@ let hasSideEffects = 0 in {
// Byte-swapping loads. Unlike normal loads, these instructions are
// allowed to access storage more than once.
def LRV : UnaryRXY<"lrv", 0xE31E, loadu<bswap, nonvolatile_load>, GR32, 4>;
def LRVG : UnaryRXY<"lrvg", 0xE30F, loadu<bswap, nonvolatile_load>, GR64, 8>;
def LRVH : UnaryRXY<"lrvh", 0xE31F, z_lrvh, GR32, 2>;
def LRV : UnaryRXY<"lrv", 0xE31E, z_lrv, GR32, 4>;
def LRVG : UnaryRXY<"lrvg", 0xE30F, z_lrvg, GR64, 8>;
// Likewise byte-swapping stores.
def STRV : StoreRXY<"strv", 0xE33E, storeu<bswap, nonvolatile_store>, GR32, 4>;
def STRVG : StoreRXY<"strvg", 0xE32F, storeu<bswap, nonvolatile_store>,
GR64, 8>;
def STRVH : StoreRXY<"strvh", 0xE33F, z_strvh, GR32, 2>;
def STRV : StoreRXY<"strv", 0xE33E, z_strv, GR32, 4>;
def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
//===----------------------------------------------------------------------===//
// Load address instructions

View File

@ -79,6 +79,14 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
def SDT_ZLoadBSwap : SDTypeProfile<1, 2,
[SDTCisInt<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDT_ZStoreBSwap : SDTypeProfile<0, 3,
[SDTCisInt<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDT_ZTBegin : SDTypeProfile<0, 2,
[SDTCisPtrTy<0>,
SDTCisVT<1, i32>]>;
@ -191,6 +199,11 @@ def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone,
def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
def z_loadbswap : SDNode<"SystemZISD::LRV", SDT_ZLoadBSwap,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def z_storebswap : SDNode<"SystemZISD::STRV", SDT_ZStoreBSwap,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
// Defined because the index is an i32 rather than a pointer.
def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
SDT_ZInsertVectorElt>;
@ -331,6 +344,17 @@ def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
// Pattern fragments
//===----------------------------------------------------------------------===//
def z_lrvh : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i16)>;
def z_lrv : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i32)>;
def z_lrvg : PatFrag<(ops node:$addr), (z_loadbswap node:$addr, i64)>;
def z_strvh : PatFrag<(ops node:$src, node:$addr),
(z_storebswap node:$src, node:$addr, i16)>;
def z_strv : PatFrag<(ops node:$src, node:$addr),
(z_storebswap node:$src, node:$addr, i32)>;
def z_strvg : PatFrag<(ops node:$src, node:$addr),
(z_storebswap node:$src, node:$addr, i64)>;
// Signed and unsigned comparisons.
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();

View File

@ -0,0 +1,99 @@
; Test 16-bit byteswaps from memory to registers.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i16 @llvm.bswap.i16(i16 %a)
; Check LRVH with no displacement.
define i16 @f1(i16 *%src) {
; CHECK-LABEL: f1:
; CHECK: lrvh %r2, 0(%r2)
; CHECK: br %r14
%a = load i16 , i16 *%src
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check the high end of the aligned LRVH range.
define i16 @f2(i16 *%src) {
; CHECK-LABEL: f2:
; CHECK: lrvh %r2, 524286(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 262143
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define i16 @f3(i16 *%src) {
; CHECK-LABEL: f3:
; CHECK: agfi %r2, 524288
; CHECK: lrvh %r2, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 262144
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check the high end of the negative aligned LRVH range.
define i16 @f4(i16 *%src) {
; CHECK-LABEL: f4:
; CHECK: lrvh %r2, -2(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 -1
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check the low end of the LRVH range.
define i16 @f5(i16 *%src) {
; CHECK-LABEL: f5:
; CHECK: lrvh %r2, -524288(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 -262144
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define i16 @f6(i16 *%src) {
; CHECK-LABEL: f6:
; CHECK: agfi %r2, -524290
; CHECK: lrvh %r2, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%src, i64 -262145
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check that LRVH allows an index.
define i16 @f7(i64 %src, i64 %index) {
; CHECK-LABEL: f7:
; CHECK: lrvh %r2, 524287({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
%a = load i16 , i16 *%ptr
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}
; Check that volatile accesses do not use LRVH, which might access the
; storage multple times.
define i16 @f8(i16 *%src) {
; CHECK-LABEL: f8:
; CHECK: lh [[REG:%r[0-5]]], 0(%r2)
; CHECK: lrvr %r2, [[REG]]
; CHECK: br %r14
%a = load volatile i16 , i16 *%src
%swapped = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %swapped
}

View File

@ -0,0 +1,100 @@
; Test 32-bit byteswaps from registers to memory.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i16 @llvm.bswap.i16(i16 %a)
; Check STRVH with no displacement.
define void @f1(i16 *%dst, i16 %a) {
; CHECK-LABEL: f1:
; CHECK: strvh %r3, 0(%r2)
; CHECK: br %r14
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%dst
ret void
}
; Check the high end of the aligned STRVH range.
define void @f2(i16 *%dst, i16 %a) {
; CHECK-LABEL: f2:
; CHECK: strvh %r3, 524286(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 262143
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}
; Check the next word up, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f3(i16 *%dst, i16 %a) {
; CHECK-LABEL: f3:
; CHECK: agfi %r2, 524288
; CHECK: strvh %r3, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 262144
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}
; Check the high end of the negative aligned STRVH range.
define void @f4(i16 *%dst, i16 %a) {
; CHECK-LABEL: f4:
; CHECK: strvh %r3, -2(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 -1
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}
; Check the low end of the STRVH range.
define void @f5(i16 *%dst, i16 %a) {
; CHECK-LABEL: f5:
; CHECK: strvh %r3, -524288(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 -262144
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}
; Check the next word down, which needs separate address logic.
; Other sequences besides this one would be OK.
define void @f6(i16 *%dst, i16 %a) {
; CHECK-LABEL: f6:
; CHECK: agfi %r2, -524290
; CHECK: strvh %r3, 0(%r2)
; CHECK: br %r14
%ptr = getelementptr i16, i16 *%dst, i64 -262145
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}
; Check that STRVH allows an index.
define void @f7(i64 %src, i64 %index, i16 %a) {
; CHECK-LABEL: f7:
; CHECK: strvh %r4, 524287({{%r3,%r2|%r2,%r3}})
; CHECK: br %r14
%add1 = add i64 %src, %index
%add2 = add i64 %add1, 524287
%ptr = inttoptr i64 %add2 to i16 *
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store i16 %swapped, i16 *%ptr
ret void
}
; Check that volatile stores do not use STRVH, which might access the
; storage multple times.
define void @f8(i16 *%dst, i16 %a) {
; CHECK-LABEL: f8:
; CHECK: lrvr [[REG:%r[0-5]]], %r3
; CHECK: srl [[REG]], 16
; CHECK: sth [[REG]], 0(%r2)
; CHECK: br %r14
%swapped = call i16 @llvm.bswap.i16(i16 %a)
store volatile i16 %swapped, i16 *%dst
ret void
}

View File

@ -5209,6 +5209,36 @@
# CHECK: lrvr %r15, %r15
0xb9 0x1f 0x00 0xff
# CHECK: lrvh %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x1f
# CHECK: lrvh %r0, -1
0xe3 0x00 0x0f 0xff 0xff 0x1f
# CHECK: lrvh %r0, 0
0xe3 0x00 0x00 0x00 0x00 0x1f
# CHECK: lrvh %r0, 1
0xe3 0x00 0x00 0x01 0x00 0x1f
# CHECK: lrvh %r0, 524287
0xe3 0x00 0x0f 0xff 0x7f 0x1f
# CHECK: lrvh %r0, 0(%r1)
0xe3 0x00 0x10 0x00 0x00 0x1f
# CHECK: lrvh %r0, 0(%r15)
0xe3 0x00 0xf0 0x00 0x00 0x1f
# CHECK: lrvh %r0, 524287(%r1,%r15)
0xe3 0x01 0xff 0xff 0x7f 0x1f
# CHECK: lrvh %r0, 524287(%r15,%r1)
0xe3 0x0f 0x1f 0xff 0x7f 0x1f
# CHECK: lrvh %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x1f
# CHECK: lrv %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x1e
@ -8500,6 +8530,36 @@
# CHECK: strvg %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x2f
# CHECK: strvh %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x3f
# CHECK: strvh %r0, -1
0xe3 0x00 0x0f 0xff 0xff 0x3f
# CHECK: strvh %r0, 0
0xe3 0x00 0x00 0x00 0x00 0x3f
# CHECK: strvh %r0, 1
0xe3 0x00 0x00 0x01 0x00 0x3f
# CHECK: strvh %r0, 524287
0xe3 0x00 0x0f 0xff 0x7f 0x3f
# CHECK: strvh %r0, 0(%r1)
0xe3 0x00 0x10 0x00 0x00 0x3f
# CHECK: strvh %r0, 0(%r15)
0xe3 0x00 0xf0 0x00 0x00 0x3f
# CHECK: strvh %r0, 524287(%r1,%r15)
0xe3 0x01 0xff 0xff 0x7f 0x3f
# CHECK: strvh %r0, 524287(%r15,%r1)
0xe3 0x0f 0x1f 0xff 0x7f 0x3f
# CHECK: strvh %r15, 0
0xe3 0xf0 0x00 0x00 0x00 0x3f
# CHECK: strv %r0, -524288
0xe3 0x00 0x00 0x00 0x80 0x3e

View File

@ -6539,6 +6539,28 @@
lrl %r7,frob@PLT
lrl %r8,frob@PLT
#CHECK: lrvh %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1f]
#CHECK: lrvh %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1f]
#CHECK: lrvh %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1f]
#CHECK: lrvh %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x1f]
#CHECK: lrvh %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x1f]
#CHECK: lrvh %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x1f]
#CHECK: lrvh %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x1f]
#CHECK: lrvh %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x1f]
#CHECK: lrvh %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x1f]
#CHECK: lrvh %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x1f]
lrvh %r0,-524288
lrvh %r0,-1
lrvh %r0,0
lrvh %r0,1
lrvh %r0,524287
lrvh %r0,0(%r1)
lrvh %r0,0(%r15)
lrvh %r0,524287(%r1,%r15)
lrvh %r0,524287(%r15,%r1)
lrvh %r15,0
#CHECK: lrv %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x1e]
#CHECK: lrv %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x1e]
#CHECK: lrv %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x1e]
@ -8965,6 +8987,28 @@
strl %r7,frob@PLT
strl %r8,frob@PLT
#CHECK: strvh %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x3f]
#CHECK: strvh %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x3f]
#CHECK: strvh %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x3f]
#CHECK: strvh %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x3f]
#CHECK: strvh %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x3f]
#CHECK: strvh %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x3f]
#CHECK: strvh %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x3f]
#CHECK: strvh %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x3f]
#CHECK: strvh %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x3f]
#CHECK: strvh %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x3f]
strvh %r0,-524288
strvh %r0,-1
strvh %r0,0
strvh %r0,1
strvh %r0,524287
strvh %r0,0(%r1)
strvh %r0,0(%r15)
strvh %r0,524287(%r1,%r15)
strvh %r0,524287(%r15,%r1)
strvh %r15,0
#CHECK: strv %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x3e]
#CHECK: strv %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x3e]
#CHECK: strv %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x3e]