diff --git a/llvm/lib/Target/IA64/IA64ISelPattern.cpp b/llvm/lib/Target/IA64/IA64ISelPattern.cpp index 7c03a45bca59..20189e1999c0 100644 --- a/llvm/lib/Target/IA64/IA64ISelPattern.cpp +++ b/llvm/lib/Target/IA64/IA64ISelPattern.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/ADT/Statistic.h" #include +#include #include using namespace llvm; @@ -412,6 +413,9 @@ namespace { /// IA64Lowering - This object fully describes how to lower LLVM code to an /// IA64-specific SelectionDAG. IA64TargetLowering IA64Lowering; + SelectionDAG *ISelDAG; // Hack to support us having a dag->dag transform + // for sdiv and udiv until it is put into the future + // dag combiner /// ExprMap - As shared expressions are codegen'd, we keep track of which /// vreg the value is produced in, so we only emit one copy of each compiled @@ -420,8 +424,8 @@ namespace { std::set LoweredTokens; public: - ISel(TargetMachine &TM) : SelectionDAGISel(IA64Lowering), IA64Lowering(TM) { - } + ISel(TargetMachine &TM) : SelectionDAGISel(IA64Lowering), IA64Lowering(TM), + ISelDAG(0) { } /// InstructionSelectBasicBlock - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. @@ -429,6 +433,9 @@ namespace { unsigned SelectExpr(SDOperand N); void Select(SDOperand N); + // a dag->dag to transform mul-by-constant-int to shifts+adds/subs + SDOperand BuildConstmulSequence(SDOperand N); + }; } @@ -437,11 +444,419 @@ namespace { void ISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { // Codegen the basic block. + ISelDAG = &DAG; Select(DAG.getRoot()); // Clear state used for selection. ExprMap.clear(); LoweredTokens.clear(); + ISelDAG = 0; +} + +const char sign[2]={'+','-'}; + + +// strip leading '0' characters from a string +void munchLeadingZeros(std::string& inString) { + while(inString.c_str()[0]=='0') { + inString.erase(0, 1); + } +} + +// strip trailing '0' characters from a string +void munchTrailingZeros(std::string& inString) { + int curPos=inString.length()-1; + + while(inString.c_str()[curPos]=='0') { + inString.erase(curPos, 1); + curPos--; + } +} + +// return how many consecutive '0' characters are at the end of a string +unsigned int countTrailingZeros(std::string& inString) { + int curPos=inString.length()-1; + unsigned int zeroCount=0; + // assert goes here + while(inString.c_str()[curPos--]=='0') { + zeroCount++; + } + return zeroCount; +} + +// booth encode a string of '1' and '0' characters (returns string of 'P' (+1) +// '0' and 'N' (-1) characters) +void boothEncode(std::string inString, std::string& boothEncodedString) { + + int curpos=0; + int replacements=0; + int lim=inString.size(); + + while(curpos1) { + inString.replace(curpos, runlength+1, replaceString); + curpos+=runlength-1; + } else + curpos++; + } else { // a zero, we just keep chugging along + curpos++; + } + } + + // clean up (trim the string, reverse it and turn '1's into 'P's) + munchTrailingZeros(inString); + boothEncodedString=""; + + for(int i=inString.size()-1;i>=0;i--) + if(inString[i]=='1') + boothEncodedString+="P"; + else + boothEncodedString+=inString[i]; + +} + +struct shiftaddblob { // this encodes stuff like (x=) "A << B [+-] C << D" + unsigned firstVal; // A + unsigned firstShift; // B + unsigned secondVal; // C + unsigned secondShift; // D + bool isSub; +}; + +/* this implements Lefevre's "pattern-based" constant multiplication, + * see "Multiplication by an Integer Constant", INRIA report 1999-06 + * + * TODO: implement a method to try rewriting P0N<->0PP / N0P<->0NN + * to get better booth encodings - this does help in practice + * TODO: weight shifts appropriately (most architectures can't + * fuse a shift and an add for arbitrary shift amounts) */ +unsigned lefevre(const std::string inString, + std::vector &ops) { + std::string retstring; + std::string s = inString; + munchTrailingZeros(s); + + int length=s.length()-1; + + if(length==0) { + return(0); + } + + std::vector p,n; + + for(int i=0; i<=length; i++) { + if (s.c_str()[length-i]=='P') { + p.push_back(i); + } else if (s.c_str()[length-i]=='N') { + n.push_back(i); + } + } + + std::string t, u; + int c,f; + std::map w; + + for(int i=0; i::const_iterator ii; + std::vector d; + std::multimap sorted_by_value; + + for(ii = w.begin(); ii!=w.end(); ii++) + sorted_by_value.insert(std::pair((*ii).second,(*ii).first)); + + for (std::multimap::iterator it = sorted_by_value.begin(); + it != sorted_by_value.end(); ++it) { + d.push_back((*it).second); + } + + int int_W=0; + int int_d; + + while(d.size()>0 && (w[int_d=d.back()] > int_W)) { + d.pop_back(); + retstring=s; // hmmm + int x=0; + int z=abs(int_d)-1; + + if(int_d>0) { + + for(int base=0; baseint_W) { + int_W = x; + t = retstring; + c = int_d; // tofix + } + + } d.pop_back(); // hmm + + u = t; + + for(int i=0; i(N.getOperand(1))->getValue(); + + bool flippedSign; + unsigned preliminaryShift=0; + + assert(constant > 0 && "erk, don't multiply by zero or negative nums\n"); + + // first, we make the constant to multiply by positive + if(constant<0) { + constant=-constant; + flippedSign=true; + } else { + flippedSign=false; + } + + // next, we make it odd. + for(; (constant%2==0); preliminaryShift++) + constant>>=1; + + //OK, we have a positive, odd number of 64 bits or less. Convert it + //to a binary string, constantString[0] is the LSB + char constantString[65]; + for(int i=0; i<64; i++) + constantString[i]='0'+((constant>>i)&0x1); + constantString[64]=0; + + // now, Booth encode it + std::string boothEncodedString; + boothEncode(constantString, boothEncodedString); + + std::vector ops; + // do the transformation, filling out 'ops' + lefevre(boothEncodedString, ops); + + SDOperand results[ops.size()]; // temporary results (of adds/subs of shifts) + + // now turn 'ops' into DAG bits + for(int i=0; igetConstant(ops[i].firstShift, MVT::i64); + SDOperand val = (ops[i].firstVal == 0) ? N.getOperand(0) : + results[ops[i].firstVal-1]; + SDOperand left = ISelDAG->getNode(ISD::SHL, MVT::i64, val, amt); + amt = ISelDAG->getConstant(ops[i].secondShift, MVT::i64); + val = (ops[i].secondVal == 0) ? N.getOperand(0) : + results[ops[i].secondVal-1]; + SDOperand right = ISelDAG->getNode(ISD::SHL, MVT::i64, val, amt); + if(ops[i].isSub) + results[i] = ISelDAG->getNode(ISD::SUB, MVT::i64, left, right); + else + results[i] = ISelDAG->getNode(ISD::ADD, MVT::i64, left, right); + } + + // don't forget flippedSign and preliminaryShift! + SDOperand finalresult; + if(preliminaryShift) { + SDOperand finalshift = ISelDAG->getConstant(preliminaryShift, MVT::i64); + finalresult = ISelDAG->getNode(ISD::SHL, MVT::i64, + results[ops.size()-1], finalshift); + } else { // there was no preliminary divide-by-power-of-2 required + finalresult = results[ops.size()-1]; + } + + return finalresult; } /// ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It @@ -909,23 +1324,32 @@ assert(0 && "hmm, ISD::SIGN_EXTEND: shouldn't ever be reached. bad luck!\n"); } case ISD::MUL: { - Tmp1 = SelectExpr(N.getOperand(0)); - Tmp2 = SelectExpr(N.getOperand(1)); if(DestType != MVT::f64) { // TODO: speed! - // boring old integer multiply with xma - unsigned TempFR1=MakeReg(MVT::f64); - unsigned TempFR2=MakeReg(MVT::f64); - unsigned TempFR3=MakeReg(MVT::f64); - BuildMI(BB, IA64::SETFSIG, 1, TempFR1).addReg(Tmp1); - BuildMI(BB, IA64::SETFSIG, 1, TempFR2).addReg(Tmp2); - BuildMI(BB, IA64::XMAL, 1, TempFR3).addReg(TempFR1).addReg(TempFR2) - .addReg(IA64::F0); - BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TempFR3); + if(N.getOperand(1).getOpcode() != ISD::Constant) { // if not a const mul + // boring old integer multiply with xma + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + + unsigned TempFR1=MakeReg(MVT::f64); + unsigned TempFR2=MakeReg(MVT::f64); + unsigned TempFR3=MakeReg(MVT::f64); + BuildMI(BB, IA64::SETFSIG, 1, TempFR1).addReg(Tmp1); + BuildMI(BB, IA64::SETFSIG, 1, TempFR2).addReg(Tmp2); + BuildMI(BB, IA64::XMAL, 1, TempFR3).addReg(TempFR1).addReg(TempFR2) + .addReg(IA64::F0); + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TempFR3); + return Result; // early exit + } else { // we are multiplying by an integer constant! yay + return Reg = SelectExpr(BuildConstmulSequence(N)); // avert your eyes! + } } - else // floating point multiply + else { // floating point multiply + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); BuildMI(BB, IA64::FMPY, 2, Result).addReg(Tmp1).addReg(Tmp2); - return Result; + return Result; + } } case ISD::SUB: {