Fix some significant problems with constant pools that resulted in unnecessary paddings between constant pool entries, larger than necessary alignments (e.g. 8 byte alignment for .literal4 sections), and potentially other issues.

1. ConstantPoolSDNode alignment field is log2 value of the alignment requirement. This is not consistent with other SDNode variants. 2. MachineConstantPool alignment field is also a log2 value. 3. However, some places are creating ConstantPoolSDNode with alignment value rather than log2 values. This creates entries with artificially large alignments, e.g. 256 for SSE vector values. 4. Constant pool entry offsets are computed when they are created. However, asm printer group them by sections. That means the offsets are no longer valid. However, asm printer uses them to determine size of padding between entries. 5. Asm printer uses expensive data structure multimap to track constant pool entries by sections. 6. Asm printer iterate over SmallPtrSet when it's emitting constant pool entries. This is non-deterministic. Solutions: 1. ConstantPoolSDNode alignment field is changed to keep non-log2 value. 2. MachineConstantPool alignment field is also changed to keep non-log2 value. 3. Functions that create ConstantPool nodes are passing in non-log2 alignments. 4. MachineConstantPoolEntry no longer keeps an offset field. It's replaced with an alignment field. Offsets are not computed when constant pool entries are created. They are computed on the fly in asm printer and JIT. 5. Asm printer uses cheaper data structure to group constant pool entries. 6. Asm printer compute entry offsets after grouping is done. 7. Change JIT code to compute entry offsets on the fly. llvm-svn: 66875
2009-03-13 07:51:59 +00:00 · 2009-03-13 07:51:59 +00:00 · 1fb8aedd1e
parent bd5616271b
commit 1fb8aedd1e
19 changed files with 182 additions and 135 deletions
--- a/llvm/include/llvm/CodeGen/MachineConstantPool.h
+++ b/llvm/include/llvm/CodeGen/MachineConstantPool.h
@ -70,28 +70,26 @@ public:
    MachineConstantPoolValue *MachineCPVal;
  } Val;

-  /// The offset of the constant from the start of the pool. The top bit is set
-  /// when Val is a MachineConstantPoolValue.
-  unsigned Offset;
+  /// The required alignment for this entry. The top bit is set when Val is
+  /// a MachineConstantPoolValue.
+  unsigned Alignment;

-  MachineConstantPoolEntry(Constant *V, unsigned O)
-    : Offset(O) {
-    assert((int)Offset >= 0 && "Offset is too large");
+  MachineConstantPoolEntry(Constant *V, unsigned A)
+    : Alignment(A) {
    Val.ConstVal = V;
  }
-  MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned O)
-    : Offset(O){
-    assert((int)Offset >= 0 && "Offset is too large");
+  MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A)
+    : Alignment(A) {
    Val.MachineCPVal = V; 
-    Offset |= 1 << (sizeof(unsigned)*8-1);
+    Alignment |= 1 << (sizeof(unsigned)*8-1);
  }

  bool isMachineConstantPoolEntry() const {
-    return (int)Offset < 0;
+    return (int)Alignment < 0;
  }

-  int getOffset() const { 
-    return Offset & ~(1 << (sizeof(unsigned)*8-1));
+  int getAlignment() const { 
+    return Alignment & ~(1 << (sizeof(unsigned)*8-1));
  }

  const Type *getType() const;
@ -117,13 +115,13 @@ public:
    : TD(td), PoolAlignment(1) {}
  ~MachineConstantPool();
    
-  /// getConstantPoolAlignment - Return the log2 of the alignment required by
+  /// getConstantPoolAlignment - Return the the alignment required by
  /// the whole constant pool, of which the first element must be aligned.
  unsigned getConstantPoolAlignment() const { return PoolAlignment; }
  
  /// getConstantPoolIndex - Create a new entry in the constant pool or return
-  /// an existing one.  User must specify the log2 of the minimum required
-  /// alignment for the object.
+  /// an existing one.  User must specify the minimum required alignment for
+  /// the object.
  unsigned getConstantPoolIndex(Constant *C, unsigned Alignment);
  unsigned getConstantPoolIndex(MachineConstantPoolValue *V,unsigned Alignment);
  
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@ -1840,7 +1840,7 @@ class ConstantPoolSDNode : public SDNode {
    MachineConstantPoolValue *MachineCPVal;
  } Val;
  int Offset;  // It's a MachineConstantPoolValue if top bit is set.
-  unsigned Alignment;
+  unsigned Alignment;  // Minimum alignment requirement of CP (not log2 value).
 protected:
  friend class SelectionDAG;
  ConstantPoolSDNode(bool isTarget, Constant *c, MVT VT, int o=0)
@ -1896,7 +1896,7 @@ public:
  }

  // Return the alignment of this constant pool object, which is either 0 (for
-  // default alignment) or log2 of the desired value.
+  // default alignment) or the desired value.
  unsigned getAlignment() const { return Alignment; }

  const Type *getType() const;
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@ -240,6 +240,16 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
  IncrementFunctionNumber();
 }

+namespace {
+  // SectionCPs - Keep track the alignment, constpool entries per Section.
+  struct SectionCPs {
+    const Section *S;
+    unsigned Alignment;
+    SmallVector<unsigned, 4> CPEs;
+    SectionCPs(const Section *s, unsigned a) : S(s), Alignment(a) {};
+  };
+}
+
 /// EmitConstantPool - Print to the current output stream assembly
 /// representations of the constants in the constant pool MCP. This is
 /// used to print out constants which have been "spilled to memory" by
@ -251,48 +261,60 @@ void AsmPrinter::EmitConstantPool(MachineConstantPool *MCP) {

  // Calculate sections for constant pool entries. We collect entries to go into
  // the same section together to reduce amount of section switch statements.
-  typedef
-    std::multimap<const Section*,
-                  std::pair<MachineConstantPoolEntry, unsigned> > CPMap;
-  CPMap  CPs;
-  SmallPtrSet<const Section*, 5> Sections;
-
+  SmallVector<SectionCPs, 4> CPSections;
  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
    MachineConstantPoolEntry CPE = CP[i];
+    unsigned Align = CPE.getAlignment();
    const Section* S = TAI->SelectSectionForMachineConst(CPE.getType());
-    CPs.insert(std::make_pair(S, std::make_pair(CPE, i)));
-    Sections.insert(S);
+    // The number of sections are small, just do a linear search from the
+    // last section to the first.
+    bool Found = false;
+    unsigned SecIdx = CPSections.size();
+    while (SecIdx != 0) {
+      if (CPSections[--SecIdx].S == S) {
+        Found = true;
+        break;
+      }
+    }
+    if (!Found) {
+      SecIdx = CPSections.size();
+      CPSections.push_back(SectionCPs(S, Align));
+    }
+
+    if (Align > CPSections[SecIdx].Alignment)
+      CPSections[SecIdx].Alignment = Align;
+    CPSections[SecIdx].CPEs.push_back(i);
  }

  // Now print stuff into the calculated sections.
-  for (SmallPtrSet<const Section*, 5>::iterator IS = Sections.begin(),
-         ES = Sections.end(); IS != ES; ++IS) {
-    SwitchToSection(*IS);
-    EmitAlignment(MCP->getConstantPoolAlignment());
+  for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+    SwitchToSection(CPSections[i].S);
+    EmitAlignment(Log2_32(CPSections[i].Alignment));

-    std::pair<CPMap::iterator, CPMap::iterator> II = CPs.equal_range(*IS);
-    for (CPMap::iterator I = II.first, E = II.second; I != E; ++I) {
-      CPMap::iterator J = next(I);
-      MachineConstantPoolEntry Entry = I->second.first;
-      unsigned index = I->second.second;
-
-      O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
-        << index << ":\t\t\t\t\t";
-    // O << TAI->getCommentString() << ' ' << 
-    //      WriteTypeSymbolic(O, CP[i].first.getType(), 0);
-      O << '\n';
-      if (Entry.isMachineConstantPoolEntry())
-        EmitMachineConstantPoolValue(Entry.Val.MachineCPVal);
-      else
-        EmitGlobalConstant(Entry.Val.ConstVal);
+    unsigned Offset = 0;
+    for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+      unsigned CPI = CPSections[i].CPEs[j];
+      MachineConstantPoolEntry CPE = CP[CPI];

      // Emit inter-object padding for alignment.
-      if (J != E) {
-        const Type *Ty = Entry.getType();
-        unsigned EntSize = TM.getTargetData()->getTypePaddedSize(Ty);
-        unsigned ValEnd = Entry.getOffset() + EntSize;
-        EmitZeros(J->second.first.getOffset()-ValEnd);
+      unsigned AlignMask = CPE.getAlignment() - 1;
+      unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+      EmitZeros(NewOffset - Offset);
+
+      const Type *Ty = CPE.getType();
+      Offset = NewOffset + TM.getTargetData()->getTypePaddedSize(Ty);
+
+      O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+        << CPI << ":\t\t\t\t\t";
+      if (VerboseAsm) {
+        O << TAI->getCommentString() << ' ';
+        WriteTypeSymbolic(O, CPE.getType(), 0);
      }
+      O << '\n';
+      if (CPE.isMachineConstantPoolEntry())
+        EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+      else
+        EmitGlobalConstant(CPE.Val.ConstVal);
    }
  }
 }
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@ -521,19 +521,12 @@ unsigned MachineConstantPool::getConstantPoolIndex(Constant *C,
  // Check to see if we already have this constant.
  //
  // FIXME, this could be made much more efficient for large constant pools.
-  unsigned AlignMask = (1 << Alignment)-1;
  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
-    if (Constants[i].Val.ConstVal == C && (Constants[i].Offset & AlignMask)== 0)
+    if (Constants[i].Val.ConstVal == C &&
+        (Constants[i].getAlignment() & (Alignment - 1)) == 0)
      return i;
  
-  unsigned Offset = 0;
-  if (!Constants.empty()) {
-    Offset = Constants.back().getOffset();
-    Offset += TD->getTypePaddedSize(Constants.back().getType());
-    Offset = (Offset+AlignMask)&~AlignMask;
-  }
-  
-  Constants.push_back(MachineConstantPoolEntry(C, Offset));
+  Constants.push_back(MachineConstantPoolEntry(C, Alignment));
  return Constants.size()-1;
 }

@ -545,19 +538,11 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
  // Check to see if we already have this constant.
  //
  // FIXME, this could be made much more efficient for large constant pools.
-  unsigned AlignMask = (1 << Alignment)-1;
  int Idx = V->getExistingMachineCPValue(this, Alignment);
  if (Idx != -1)
    return (unsigned)Idx;

-  unsigned Offset = 0;
-  if (!Constants.empty()) {
-    Offset = Constants.back().getOffset();
-    Offset += TD->getTypePaddedSize(Constants.back().getType());
-    Offset = (Offset+AlignMask)&~AlignMask;
-  }
-  
-  Constants.push_back(MachineConstantPoolEntry(V, Offset));
+  Constants.push_back(MachineConstantPoolEntry(V, Alignment));
  return Constants.size()-1;
 }

@ -568,7 +553,7 @@ void MachineConstantPool::print(raw_ostream &OS) const {
      Constants[i].Val.MachineCPVal->print(OS);
    else
      OS << *(Value*)Constants[i].Val.ConstVal;
-    OS << " , offset=" << Constants[i].getOffset();
+    OS << " , alignment=" << Constants[i].getAlignment();
    OS << "\n";
  }
 }
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@ -5672,8 +5672,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
                                            TD.getPrefTypeAlignment(FPTy));
-        unsigned Alignment =
-          1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();

        // Get the offsets to the 0 and 1 element of the array so that we can
        // select between them.
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@ -592,7 +592,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
  }

  SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
-  unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  if (Extend)
    return DAG.getExtLoad(ISD::EXTLOAD, dl,
                          OrigVT, DAG.getEntryNode(),
@ -5547,7 +5547,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
    }
    Constant *CP = ConstantVector::get(CV);
    SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
-    unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
    return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                       PseudoSourceValue::getConstantPool(), 0,
                       false, Alignment);
@ -6019,7 +6019,7 @@ ExpandIntToFP(bool isSigned, MVT DestTy, SDValue Source, DebugLoc dl) {
    Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);

    SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
-    unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
    CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
    Alignment = std::min(Alignment, 4u);
    SDValue FudgeInReg;
@ -6173,7 +6173,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
  Constant *FudgeFactor = ConstantInt::get(Type::Int64Ty, FF);

  SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
-  unsigned Alignment = 1 << cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
  CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
  Alignment = std::min(Alignment, 4u);
  SDValue FudgeInReg;
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@ -2289,8 +2289,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
    if (TLI.isBigEndian()) std::swap(Zero, Four);
    SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
                                 Zero, Four);
-    unsigned Alignment =
-      1 << cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+    unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
    FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
    Alignment = std::min(Alignment, 4u);

--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@ -298,11 +298,10 @@ void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
    const Type *Type = CP->getType();
    // MachineConstantPool wants an explicit alignment.
    if (Align == 0) {
-      Align = TM.getTargetData()->getPreferredTypeAlignmentShift(Type);
+      Align = TM.getTargetData()->getPrefTypeAlignment(Type);
      if (Align == 0) {
        // Alignment of vector types.  FIXME!
        Align = TM.getTargetData()->getTypePaddedSize(Type);
-        Align = Log2_64(Align);
      }
    }
    
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@ -1023,8 +1023,7 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
                                      unsigned Alignment, int Offset,
                                      bool isTarget) {
  if (Alignment == 0)
-    Alignment =
-      TLI.getTargetData()->getPreferredTypeAlignmentShift(C->getType());
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
  FoldingSetNodeID ID;
  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
@ -1046,8 +1045,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
                                      unsigned Alignment, int Offset,
                                      bool isTarget) {
  if (Alignment == 0)
-    Alignment =
-      TLI.getTargetData()->getPreferredTypeAlignmentShift(C->getType());
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
  FoldingSetNodeID ID;
  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@ -162,7 +162,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
        Op += '>';
      }
    }
-    Op += " A=" + itostr(1 << CP->getAlignment());
+    Op += " A=" + itostr(CP->getAlignment());
  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
    Op = "BB: ";
    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
--- a/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp
@ -98,7 +98,7 @@ namespace {
    /// external functions.
    std::map<void*, void*> ExternalFnToStubMap;

-    //map addresses to indexes in the GOT
+    /// revGOTMap - map addresses to indexes in the GOT
    std::map<void*, unsigned> revGOTMap;
    unsigned nextGOTIndex;

@ -562,6 +562,10 @@ namespace {
    ///
    void *ConstantPoolBase;

+    /// ConstPoolAddresses - Addresses of individual constant pool entries.
+    ///
+    SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
    /// JumpTable - The jump tables for the current function.
    ///
    MachineJumpTableInfo *JumpTable;
@ -787,15 +791,19 @@ void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
  FnRefs.insert(CurFn);
 }

-static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP) {
+static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
+                                           const TargetData *TD) {
  const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
  if (Constants.empty()) return 0;

-  MachineConstantPoolEntry CPE = Constants.back();
-  unsigned Size = CPE.Offset;
-  const Type *Ty = CPE.isMachineConstantPoolEntry()
-    ? CPE.Val.MachineCPVal->getType() : CPE.Val.ConstVal->getType();
-  Size += TheJIT->getTargetData()->getTypePaddedSize(Ty);
+  unsigned Size = 0;
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = Constants[i];
+    unsigned AlignMask = CPE.getAlignment() - 1;
+    Size = (Size + AlignMask) & ~AlignMask;
+    const Type *Ty = CPE.getType();
+    Size += TD->getTypePaddedSize(Ty);
+  }
  return Size;
 }

@ -979,11 +987,10 @@ void JITEmitter::startFunction(MachineFunction &F) {
    ActualSize = RoundUpToAlign(ActualSize, 16);
    
    // Add the alignment of the constant pool
-    ActualSize = RoundUpToAlign(ActualSize, 
-                                1 << MCP->getConstantPoolAlignment());
+    ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment());

    // Add the constant pool size
-    ActualSize += GetConstantPoolSizeInBytes(MCP);
+    ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());

    // Add the aligment of the jump table info
    ActualSize = RoundUpToAlign(ActualSize, MJTI->getAlignment());
@ -1139,6 +1146,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
       << ": " << (FnEnd-FnStart) << " bytes of text, "
       << Relocations.size() << " relocations\n";
  Relocations.clear();
+  ConstPoolAddresses.clear();

  // Mark code region readable and executable if it's not so already.
  MemMgr->setMemoryExecutable();
@ -1274,13 +1282,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
  const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
  if (Constants.empty()) return;

-  MachineConstantPoolEntry CPE = Constants.back();
-  unsigned Size = CPE.Offset;
-  const Type *Ty = CPE.isMachineConstantPoolEntry()
-    ? CPE.Val.MachineCPVal->getType() : CPE.Val.ConstVal->getType();
-  Size += TheJIT->getTargetData()->getTypePaddedSize(Ty);
-
-  unsigned Align = 1 << MCP->getConstantPoolAlignment();
+  unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
+  unsigned Align = MCP->getConstantPoolAlignment();
  ConstantPoolBase = allocateSpace(Size, Align);
  ConstantPool = MCP;

@ -1290,16 +1293,26 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
       << "] (size: " << Size << ", alignment: " << Align << ")\n";

  // Initialize the memory for all of the constant pool entries.
+  unsigned Offset = 0;
  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
-    void *CAddr = (char*)ConstantPoolBase+Constants[i].Offset;
-    if (Constants[i].isMachineConstantPoolEntry()) {
+    MachineConstantPoolEntry CPE = Constants[i];
+    unsigned AlignMask = CPE.getAlignment() - 1;
+    Offset = (Offset + AlignMask) & ~AlignMask;
+
+    uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
+    ConstPoolAddresses.push_back(CAddr);
+    if (CPE.isMachineConstantPoolEntry()) {
      // FIXME: add support to lower machine constant pool values into bytes!
      cerr << "Initialize memory with machine specific constant pool entry"
           << " has not been implemented!\n";
      abort();
    }
-    TheJIT->InitializeMemory(Constants[i].Val.ConstVal, CAddr);
-    DOUT << "JIT:   CP" << i << " at [" << CAddr << "]\n";
+    TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
+    DOUT << "JIT:   CP" << i << " at [0x"
+         << std::hex << CAddr << std::dec << "]\n";
+
+    const Type *Ty = CPE.Val.ConstVal->getType();
+    Offset += TheJIT->getTargetData()->getTypePaddedSize(Ty);
  }
 }

@ -1398,8 +1411,7 @@ void *JITEmitter::finishGVStub(const GlobalValue* GV) {
 uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
  assert(ConstantNum < ConstantPool->getConstants().size() &&
         "Invalid ConstantPoolIndex!");
-  return (uintptr_t)ConstantPoolBase +
-         ConstantPool->getConstants()[ConstantNum].Offset;
+  return ConstPoolAddresses[ConstantNum];
 }

 // getJumpTableEntryAddress - Return the address of the JumpTable with index
--- a/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@ -47,11 +47,11 @@ ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv,

 int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
                                                    unsigned Alignment) {
-  unsigned AlignMask = (1 << Alignment)-1;
+  unsigned AlignMask = Alignment - 1;
  const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
    if (Constants[i].isMachineConstantPoolEntry() &&
-        (Constants[i].Offset & AlignMask) == 0) {
+        (Constants[i].getAlignment() & AlignMask) == 0) {
      ARMConstantPoolValue *CPV =
        (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
      if (CPV->GV == GV &&
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@ -540,7 +540,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
    if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
                                                           ARMCP::CPStub, 4);
-      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 2);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
      Callee = DAG.getLoad(getPointerTy(), dl, 
                           DAG.getEntryNode(), CPAddr, NULL, 0); 
@ -559,7 +559,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
    if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
                                                           ARMCP::CPStub, 4);
-      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 2);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
      Callee = DAG.getLoad(getPointerTy(), dl,
                           DAG.getEntryNode(), CPAddr, NULL, 0); 
@ -744,7 +744,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
  ARMConstantPoolValue *CPV =
    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
                             PCAdj, "tlsgd", true);
-  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
  SDValue Chain = Argument.getValue(1);
@ -785,7 +785,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
    ARMConstantPoolValue *CPV =
      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
                               PCAdj, "gottpoff", true);
-    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
    Chain = Offset.getValue(1);
@ -798,7 +798,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
    // local exec model
    ARMConstantPoolValue *CPV =
      new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
-    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
  }
@ -832,7 +832,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
    ARMConstantPoolValue *CPV =
      new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
-    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 
                                 CPAddr, NULL, 0);
@ -843,7 +843,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
      Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
    return Result;
  } else {
-    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2);
+    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
  }
@ -869,7 +869,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
  bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
  SDValue CPAddr;
  if (RelocM == Reloc::Static)
-    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 2);
+    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
  else {
    unsigned PCAdj = (RelocM != Reloc::PIC_)
      ? 0 : (Subtarget->isThumb() ? 4 : 8);
@ -877,7 +877,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
      : ARMCP::CPValue;
    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
                                                         Kind, PCAdj);
-    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
  }
  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);

@ -904,7 +904,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
  ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
                                                       ARMPCLabelIndex,
                                                       ARMCP::CPValue, PCAdj);
-  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 2);
+  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.cpp
@ -184,7 +184,7 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
  MachineFunction &MF = *MBB.getParent();
  MachineConstantPool *ConstantPool = MF.getConstantPool();
  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
-  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 2);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
  if (isThumb)
    BuildMI(MBB, MBBI, dl, 
            TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx);
--- a/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@ -276,7 +276,7 @@ public:

    Constant *CP = ConstantVector::get(CV);
    SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
-    unsigned Alignment = 1 << cast<ConstantPoolSDNode > (CPIdx)->getAlignment();
+    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
    SDValue CGPoolOffset =
            SPU::LowerConstantPool(CPIdx, *CurDAG,
                                   SPUtli.getSPUTargetMachine());
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@ -1480,11 +1480,10 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
  }
  
  // MachineConstantPool wants an explicit alignment.
-  unsigned Align = TD.getPreferredTypeAlignmentShift(C->getType());
+  unsigned Align = TD.getPrefTypeAlignment(C->getType());
  if (Align == 0) {
    // Alignment of vector types.  FIXME!
    Align = TD.getTypePaddedSize(C->getType());
-    Align = Log2_64(Align);
  }
  
  // x86-32 PIC requires a PIC base register for constant pools.
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -4672,9 +4672,8 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
  // FIXME there isn't really any debug info here, should come from the parent
  DebugLoc dl = CP->getDebugLoc();
-  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(),
-                                               getPointerTy(),
-                                               CP->getAlignment());
+  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+                                             CP->getAlignment());
  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
  // With PIC, the address is actually $g + Offset.
  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
@ -5063,13 +5062,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
  CV0.push_back(ConstantInt::get(APInt(32, 0)));
  CV0.push_back(ConstantInt::get(APInt(32, 0)));
  Constant *C0 = ConstantVector::get(CV0);
-  SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 4);
+  SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);

  std::vector<Constant*> CV1;
  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4530000000000000ULL))));
  CV1.push_back(ConstantFP::get(APFloat(APInt(64, 0x4330000000000000ULL))));
  Constant *C1 = ConstantVector::get(CV1);
-  SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 4);
+  SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);

  SmallVector<SDValue, 4> MaskVec;
  MaskVec.push_back(DAG.getConstant(0, MVT::i32));
@ -5266,7 +5265,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
    CV.push_back(C);
  }
  Constant *C = ConstantVector::get(CV);
-  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                               PseudoSourceValue::getConstantPool(), 0,
                               false, 16);
@ -5295,7 +5294,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
    CV.push_back(C);
  }
  Constant *C = ConstantVector::get(CV);
-  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                               PseudoSourceValue::getConstantPool(), 0,
                               false, 16);
@ -5343,7 +5342,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
  }
  Constant *C = ConstantVector::get(CV);
-  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
  SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
                                PseudoSourceValue::getConstantPool(), 0,
                                false, 16);
@ -5372,7 +5371,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
    CV.push_back(ConstantFP::get(APFloat(APInt(32, 0))));
  }
  C = ConstantVector::get(CV);
-  CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
+  CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
  SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                                PseudoSourceValue::getConstantPool(), 0,
                                false, 16);
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@ -2186,7 +2186,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
    Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
                    ConstantVector::getNullValue(Ty) :
                    ConstantVector::getAllOnesValue(Ty);
-    unsigned CPI = MCP.getConstantPoolIndex(C, /*AlignmentLog2=*/4);
+    unsigned CPI = MCP.getConstantPoolIndex(C, 16);

    // Create operands to load from the constant pool entry.
    MOs.push_back(MachineOperand::CreateReg(PICBase, false));
--- a/llvm/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
+++ b/llvm/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; rdar://6668548
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+define void @SolveCubic_bb1(i32* %solutions, double* %x, x86_fp80 %.reload, x86_fp80 %.reload5, x86_fp80 %.reload6, double %.reload8) nounwind {
+newFuncRoot:
+	br label %bb1
+
+bb1.ret.exitStub:		; preds = %bb1
+	ret void
+
+bb1:		; preds = %newFuncRoot
+	store i32 1, i32* %solutions, align 4
+	%0 = tail call double @llvm.sqrt.f64(double %.reload8)		; <double> [#uses=1]
+	%1 = fptrunc x86_fp80 %.reload6 to double		; <double> [#uses=1]
+	%2 = tail call double @fabs(double %1) nounwind readnone		; <double> [#uses=1]
+	%3 = add double %0, %2		; <double> [#uses=1]
+	%4 = tail call double @llvm.pow.f64(double %3, double 0x3FD5555555555555)		; <double> [#uses=1]
+	%5 = fpext double %4 to x86_fp80		; <x86_fp80> [#uses=2]
+	%6 = fdiv x86_fp80 %.reload5, %5		; <x86_fp80> [#uses=1]
+	%7 = add x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
+	%8 = fptrunc x86_fp80 %7 to double		; <double> [#uses=1]
+	%9 = fcmp olt x86_fp80 %.reload6, 0xK00000000000000000000		; <i1> [#uses=1]
+	%iftmp.6.0 = select i1 %9, double 1.000000e+00, double -1.000000e+00		; <double> [#uses=1]
+	%10 = mul double %8, %iftmp.6.0		; <double> [#uses=1]
+	%11 = fpext double %10 to x86_fp80		; <x86_fp80> [#uses=1]
+	%12 = fdiv x86_fp80 %.reload, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%13 = add x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
+	%14 = fptrunc x86_fp80 %13 to double		; <double> [#uses=1]
+	store double %14, double* %x, align 1
+	br label %bb1.ret.exitStub
+}