give VZEXT_LOAD a memory operand, it now works with segment registers.

llvm-svn: 114515
This commit is contained in:
Chris Lattner 2010-09-22 00:34:38 +00:00
parent c31ba26e0c
commit 54e5329545
7 changed files with 41 additions and 16 deletions

View File

@ -1169,7 +1169,6 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
Parent->getOpcode() != ISD::PREFETCH &&
Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores.
Parent->getOpcode() != X86ISD::VZEXT_LOAD &&
Parent->getOpcode() != X86ISD::FLD &&
Parent->getOpcode() != X86ISD::FILD &&
Parent->getOpcode() != X86ISD::FILD_FLAG &&

View File

@ -4113,7 +4113,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
/// There's even a handy isZeroNode for that purpose.
static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
DebugLoc &dl, SelectionDAG &DAG) {
DebugLoc &DL, SelectionDAG &DAG) {
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
@ -4150,18 +4150,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
// consecutive loads for the low half, generate a vzext_load node.
if (LastLoadedElt == NumElems - 1) {
if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
LDBase->getPointerInfo(),
LDBase->isVolatile(), LDBase->isNonTemporal(),
LDBase->getAlignment());
} else if (NumElems == 4 && LastLoadedElt == 1) {
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
Ops, 2, MVT::i32,
LDBase->getMemOperand());
return DAG.getNode(ISD::BIT_CONVERT, DL, VT, ResNode);
}
return SDValue();
}

View File

@ -220,9 +220,6 @@ namespace llvm {
// VZEXT_MOVL - Vector move low and zero extend.
VZEXT_MOVL,
// VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
// VSHL, VSRL - Vector logical left / right shift.
VSHL, VSRL,
@ -309,8 +306,11 @@ namespace llvm {
// LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
LCMPXCHG_DAG,
LCMPXCHG8_DAG
LCMPXCHG8_DAG,
// VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
VZEXT_LOAD
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
// thought as target memory ops!

View File

@ -102,7 +102,7 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
[SDNPHasChain, SDNPMayLoad]>;
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;

View File

@ -111,10 +111,10 @@ def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
SDNPMayLoad]>;
SDNPMayLoad, SDNPMemOperand]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
SDNPMayLoad]>;
SDNPMayLoad, SDNPMemOperand]>;
def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
[SDNPHasChain, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
define i32 @test1() nounwind readonly {
entry:
@ -31,3 +31,27 @@ entry:
; X64: test2:
; X64: callq *%gs:(%rdi)
define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
entry:
%0 = load i64 addrspace(256)* %p
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
%1 = bitcast <2 x i64> %tmp2 to <8 x i16>
%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
%3 = bitcast <4 x i32> %2 to <2 x i64>
ret <2 x i64> %3
; X32: pmovsxwd_1:
; X32: movl 4(%esp), %eax
; X32: pmovsxwd %gs:(%eax), %xmm0
; X32: ret
; X64: pmovsxwd_1:
; X64: pmovsxwd %gs:(%rdi), %xmm0
; X64: ret
}
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
; CHECK: jne
; CHECK: je
; widening select v6i32 and then a sub