From 67ca6f6347255afa1d75aa97df2d98623e8a9658 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 26 Apr 2008 07:40:11 +0000 Subject: [PATCH] When SRoA'ing a global variable, make sure the new globals get the appropriate alignment. This fixes a miscompilation of 252.eon on x86-64 (rdar://5891920). Bill, please pull this into Tak. llvm-svn: 50308 --- llvm/lib/Transforms/IPO/GlobalOpt.cpp | 37 ++++++++++++++++--- .../GlobalOpt/2008-04-26-SROA-Global-Align.ll | 32 ++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 50c5eccbd8ff..600be26620a8 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/MathExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -455,7 +456,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { /// behavior of the program in a more fine-grained way. We have determined that /// this transformation is safe already. We return the first global variable we /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) return 0; @@ -467,8 +468,14 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV) { std::vector NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); + // Get the alignment of the global, either explicit or target-specific. + unsigned StartAlignment = GV->getAlignment(); + if (StartAlignment == 0) + StartAlignment = TD.getABITypeAlignment(GV->getType()); + if (const StructType *STy = dyn_cast(Ty)) { NewGlobals.reserve(STy->getNumElements()); + const StructLayout &Layout = *TD.getStructLayout(STy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Constant *In = getAggregateConstantElement(Init, ConstantInt::get(Type::Int32Ty, i)); @@ -480,19 +487,28 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV) { GV->isThreadLocal()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + uint64_t FieldOffset = Layout.getElementOffset(i); + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset); + if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i))) + NGV->setAlignment(NewAlign); } } else if (const SequentialType *STy = dyn_cast(Ty)) { unsigned NumElements = 0; if (const ArrayType *ATy = dyn_cast(STy)) NumElements = ATy->getNumElements(); - else if (const VectorType *PTy = dyn_cast(STy)) - NumElements = PTy->getNumElements(); else - assert(0 && "Unknown aggregate sequential type!"); + NumElements = cast(STy)->getNumElements(); if (NumElements > 16 && GV->hasNUsesOrMore(16)) return 0; // It's not worth it. NewGlobals.reserve(NumElements); + + uint64_t EltSize = TD.getABITypeSize(STy->getElementType()); + unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType()); for (unsigned i = 0, e = NumElements; i != e; ++i) { Constant *In = getAggregateConstantElement(Init, ConstantInt::get(Type::Int32Ty, i)); @@ -505,6 +521,13 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV) { GV->isThreadLocal()); Globals.insert(GV, NGV); NewGlobals.push_back(NGV); + + // Calculate the known alignment of the field. If the original aggregate + // had 256 byte alignment for example, something might depend on that: + // propagate info to each field. + unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i); + if (NewAlign > EltAlign) + NGV->setAlignment(NewAlign); } } @@ -804,6 +827,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, GV->getName()+".body", (Module *)NULL, GV->isThreadLocal()); + // FIXME: This new global should have the alignment returned by malloc. Code + // could depend on malloc returning large alignment (on the mac, 16 bytes) but + // this would only guarantee some lower alignment. GV->getParent()->getGlobalList().insert(GV, NewGV); // Anything that used the malloc now uses the global directly. @@ -1520,7 +1546,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ++NumMarked; return true; } else if (!GV->getInitializer()->getType()->isFirstClassType()) { - if (GlobalVariable *FirstNewGV = SRAGlobal(GV)) { + if (GlobalVariable *FirstNewGV = SRAGlobal(GV, + getAnalysis())) { GVI = FirstNewGV; // Don't skip the newly produced globals! return true; } diff --git a/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll new file mode 100644 index 000000000000..3464be901844 --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll @@ -0,0 +1,32 @@ +; Verify that when @G is SROA'd that the new globals have correct +; alignments. Elements 0 and 2 must be 16-byte aligned, and element +; 1 must be at least 8 byte aligned (but could be more). + +; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@G.0 = internal global .*align 16} +; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@G.1 = internal global .*align 8} +; RUN: llvm-as < %s | opt -globalopt | llvm-dis | grep {@G.2 = internal global .*align 16} +; rdar://5891920 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin8" + +%T = type { double, double, double } + +@G = internal global %T zeroinitializer, align 16 + + +define void @test() { + store double 1.0, double* getelementptr (%T* @G, i32 0, i32 0), align 16 + store double 2.0, double* getelementptr (%T* @G, i32 0, i32 1), align 8 + store double 3.0, double* getelementptr (%T* @G, i32 0, i32 2), align 16 + ret void +} + +define double @test2() { + %V1 = load double* getelementptr (%T* @G, i32 0, i32 0), align 16 + %V2 = load double* getelementptr (%T* @G, i32 0, i32 1), align 8 + %V3 = load double* getelementptr (%T* @G, i32 0, i32 2), align 16 + %R = add double %V1, %V2 + %R2 = add double %R, %V3 + ret double %R2 +}