Implement ctlz and cttz with bsr and bsf.

llvm-svn: 45024
This commit is contained in:
Evan Cheng 2007-12-14 02:13:44 +00:00
parent d4d8c3c717
commit e9fbc3f014
5 changed files with 119 additions and 9 deletions

View File

@ -210,18 +210,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS , MVT::i32 , Custom);
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Expand);
setOperationAction(ISD::CTLZ , MVT::i8 , Expand);
setOperationAction(ISD::CTTZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Expand);
setOperationAction(ISD::CTLZ , MVT::i16 , Expand);
setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
setOperationAction(ISD::CTLZ , MVT::i32 , Expand);
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
setOperationAction(ISD::CTLZ , MVT::i64 , Expand);
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
@ -5345,6 +5345,42 @@ SDOperand X86TargetLowering::LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG) {
ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal);
}
SDOperand X86TargetLowering::LowerCTLZ(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType OpVT = VT;
unsigned NumBits = MVT::getSizeInBits(VT);
Op = Op.getOperand(0);
if (VT == MVT::i8) {
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
}
if (VT == MVT::i32 || VT == MVT::i64)
return DAG.getNode(ISD::XOR, OpVT, DAG.getNode(X86ISD::BSR, OpVT, Op),
DAG.getConstant(NumBits-1, OpVT));
Op = DAG.getNode(ISD::SUB, OpVT, DAG.getConstant(NumBits-1, OpVT),
DAG.getNode(X86ISD::BSR, OpVT, Op));
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
return Op;
}
SDOperand X86TargetLowering::LowerCTTZ(SDOperand Op, SelectionDAG &DAG) {
MVT::ValueType VT = Op.getValueType();
MVT::ValueType OpVT = VT;
Op = Op.getOperand(0);
if (VT == MVT::i8) {
OpVT = MVT::i32;
Op = DAG.getNode(ISD::ZERO_EXTEND, OpVT, Op);
}
Op = DAG.getNode(X86ISD::BSF, OpVT, Op);
if (VT == MVT::i8)
Op = DAG.getNode(ISD::TRUNCATE, MVT::i8, Op);
return Op;
}
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
@ -5387,7 +5423,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS: return LowerFLT_ROUNDS(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
// FIXME: REMOVE THIS WHEN LegalizeDAGTypes lands.
case ISD::READCYCLECOUNTER:
@ -5407,6 +5444,8 @@ SDNode *X86TargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) {
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
case X86ISD::BSF: return "X86ISD::BSF";
case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
case X86ISD::SHRD: return "X86ISD::SHRD";
case X86ISD::FAND: return "X86ISD::FAND";

View File

@ -28,6 +28,11 @@ namespace llvm {
// Start the numbering where the builtin ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END+X86::INSTRUCTION_LIST_END,
/// BSF - Bit scan forward.
/// BSR - Bit scan reverse.
BSF,
BSR,
/// SHLD, SHRD - Double shift instructions. These correspond to
/// X86::SHLDxx and X86::SHRDxx instructions.
SHLD,
@ -489,6 +494,8 @@ namespace llvm {
SDOperand LowerEH_RETURN(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerTRAMPOLINE(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerFLT_ROUNDS(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCTLZ(SDOperand Op, SelectionDAG &DAG);
SDOperand LowerCTTZ(SDOperand Op, SelectionDAG &DAG);
SDNode *ExpandFP_TO_SINT(SDNode *N, SelectionDAG &DAG);
SDNode *ExpandREADCYCLECOUNTER(SDNode *N, SelectionDAG &DAG);
};

View File

@ -57,6 +57,8 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>;
def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
@ -445,6 +447,35 @@ def XCHG32rm : I<0x87, MRMSrcMem,
(outs), (ins GR32:$src1, i32mem:$src2),
"xchg{l}\t{$src2|$src1}, {$src1|$src2}", []>;
// Bit scan instructions.
let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsf GR16:$src))]>, TB;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsf (loadi16 addr:$src)))]>, TB;
def BSF32rr : I<0xBC, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsf GR32:$src))]>, TB;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsf (loadi32 addr:$src)))]>, TB;
def BSR16rr : I<0xBD, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsr GR16:$src))]>, TB;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst||$dst, $src}",
[(set GR16:$dst, (X86bsr (loadi16 addr:$src)))]>, TB;
def BSR32rr : I<0xBD, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsr GR32:$src))]>, TB;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst||$dst, $src}",
[(set GR32:$dst, (X86bsr (loadi32 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
def LEA16r : I<0x8D, MRMSrcMem,
(outs GR16:$dst), (ins i32mem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;

View File

@ -167,6 +167,23 @@ def XCHG64mr : RI<0x87, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
def XCHG64rm : RI<0x87, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2),
"xchg{q}\t{$src2|$src1}, {$src1|$src2}", []>;
// Bit scan instructions.
let Defs = [EFLAGS] in {
def BSF64rr : RI<0xBC, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst||$dst, $src}",
[(set GR64:$dst, (X86bsf GR64:$src))]>, TB;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst||$dst, $src}",
[(set GR64:$dst, (X86bsf (loadi64 addr:$src)))]>, TB;
def BSR64rr : RI<0xBD, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst||$dst, $src}",
[(set GR64:$dst, (X86bsr GR64:$src))]>, TB;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst||$dst, $src}",
[(set GR64:$dst, (X86bsr (loadi64 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
// Repeat string ops
let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",

View File

@ -0,0 +1,16 @@
; RUN: llvm-as < %s | llc -march=x86 | grep bsr
; RUN: llvm-as < %s | llc -march=x86 | grep bsf
define i32 @t1(i32 %x) nounwind {
%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
ret i32 %tmp
}
declare i32 @llvm.ctlz.i32(i32) nounwind readnone
define i32 @t2(i32 %x) nounwind {
%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
ret i32 %tmp
}
declare i32 @llvm.cttz.i32(i32) nounwind readnone