SimplifyCFG: Make the switch-to-lookup table transformation store the

tables in bitmaps when they fit in a target-legal register.

This saves some space, and it also allows for building tables that would
otherwise be deemed too sparse.

One interesting case that this hits is example 7 from
http://blog.regehr.org/archives/320. We currently generate good code
for this when lowering the switch to the selection DAG: we build a
bitmask to decide whether to jump to one block or the other. My patch
will result in the same bitmask, but it removes the need for the jump,
as the return value can just be retrieved from the mask.

llvm-svn: 164684
This commit is contained in:
Hans Wennborg 2012-09-26 09:44:49 +00:00
parent 35795cf9c1
commit 39583b88a0
2 changed files with 162 additions and 26 deletions

View File

@ -60,6 +60,7 @@ SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumSpeculations, "Number of speculative executed instructions");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block"); STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
namespace { namespace {
@ -3252,12 +3253,19 @@ namespace {
uint64_t TableSize, uint64_t TableSize,
ConstantInt *Offset, ConstantInt *Offset,
const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
Constant *DefaultValue); Constant *DefaultValue,
const TargetData *TD);
/// BuildLookup - Build instructions with Builder to retrieve the value at /// BuildLookup - Build instructions with Builder to retrieve the value at
/// the position given by Index in the lookup table. /// the position given by Index in the lookup table.
Value *BuildLookup(Value *Index, IRBuilder<> &Builder); Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
/// WouldFitInRegister - Return true if a table with TableSize elements of
/// type ElementType would fit in a target-legal register.
static bool WouldFitInRegister(const TargetData *TD,
uint64_t TableSize,
const Type *ElementType);
private: private:
// Depending on the contents of the table, it can be represented in // Depending on the contents of the table, it can be represented in
// different ways. // different ways.
@ -3266,6 +3274,11 @@ namespace {
// store that single value and return it for each lookup. // store that single value and return it for each lookup.
SingleValueKind, SingleValueKind,
// For small tables with integer elements, we can pack them into a bitmap
// that fits into a target-legal register. Values are retrieved by
// shift and mask operations.
BitMapKind,
// The table is stored as an array of values. Values are retrieved by load // The table is stored as an array of values. Values are retrieved by load
// instructions from the table. // instructions from the table.
ArrayKind ArrayKind
@ -3274,6 +3287,10 @@ namespace {
// For SingleValueKind, this is the single value. // For SingleValueKind, this is the single value.
Constant *SingleValue; Constant *SingleValue;
// For BitMapKind, this is the bitmap.
ConstantInt *BitMap;
IntegerType *BitMapElementTy;
// For ArrayKind, this is the array. // For ArrayKind, this is the array.
GlobalVariable *Array; GlobalVariable *Array;
}; };
@ -3283,7 +3300,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
uint64_t TableSize, uint64_t TableSize,
ConstantInt *Offset, ConstantInt *Offset,
const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values, const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
Constant *DefaultValue) { Constant *DefaultValue,
const TargetData *TD) {
assert(Values.size() && "Can't build lookup table without values."); assert(Values.size() && "Can't build lookup table without values.");
assert(TableSize >= Values.size() && "Can't fit values in table."); assert(TableSize >= Values.size() && "Can't fit values in table.");
@ -3323,6 +3341,21 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
return; return;
} }
// If the type is integer and the table fits in a register, build a bitmap.
if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
APInt TableInt(TableSize * IT->getBitWidth(), 0);
for (uint64_t I = TableSize; I > 0; --I) {
TableInt <<= IT->getBitWidth();
TableInt |= cast<ConstantInt>(TableContents[I - 1])->getZExtValue();
}
BitMap = ConstantInt::get(M.getContext(), TableInt);
BitMapElementTy = IT;
Kind = BitMapKind;
++NumBitMaps;
return;
}
// Store the table in an array. // Store the table in an array.
ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize); ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
@ -3339,6 +3372,32 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
switch (Kind) { switch (Kind) {
case SingleValueKind: case SingleValueKind:
return SingleValue; return SingleValue;
case BitMapKind: {
// Type of the bitmap (e.g. i59).
IntegerType *MapTy = BitMap->getType();
// Cast Index to the same type as the bitmap.
// Note: The Index is <= the number of elements in the table, so
// truncating it to the width of the bitmask is safe.
Value *ShiftAmt = Index;
IntegerType *IndexTy = cast<IntegerType>(Index->getType());
if (IndexTy->getBitWidth() < MapTy->getBitWidth())
ShiftAmt = Builder.CreateZExt(ShiftAmt, MapTy, "switch.zext");
else if (IndexTy->getBitWidth() > MapTy->getBitWidth())
ShiftAmt = Builder.CreateTrunc(ShiftAmt, MapTy, "switch.trunc");
// Multiply the shift amount by the element width.
ShiftAmt = Builder.CreateMul(ShiftAmt,
ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
"switch.shiftamt");
// Shift down.
Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
"switch.downshift");
// Mask off.
return Builder.CreateTrunc(DownShifted, BitMapElementTy,
"switch.masked");
}
case ArrayKind: { case ArrayKind: {
Value *GEPIndices[] = { Builder.getInt32(0), Index }; Value *GEPIndices[] = { Builder.getInt32(0), Index };
Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices, Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
@ -3349,35 +3408,53 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
llvm_unreachable("Unknown lookup table kind!"); llvm_unreachable("Unknown lookup table kind!");
} }
bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD,
uint64_t TableSize,
const Type *ElementType) {
if (!TD)
return false;
const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
if (!IT)
return false;
// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
// are <= 15, we could try to narrow the type.
return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
}
/// ShouldBuildLookupTable - Determine whether a lookup table should be built /// ShouldBuildLookupTable - Determine whether a lookup table should be built
/// for this switch, based on the number of caes, size of the table and the /// for this switch, based on the number of caes, size of the table and the
/// types of the results. /// types of the results.
static bool ShouldBuildLookupTable(SwitchInst *SI, static bool ShouldBuildLookupTable(SwitchInst *SI,
uint64_t TableSize) { uint64_t TableSize,
const TargetData *TD,
const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
// The table density should be at least 40%. This is the same criterion as for // The table density should be at least 40%. This is the same criterion as for
// jump tables, see SelectionDAGBuilder::handleJTSwitchCase. // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
// FIXME: Find the best cut-off. // FIXME: Find the best cut-off.
if (SI->getNumCases() * 10 >= TableSize * 4) if (SI->getNumCases() * 10 >= TableSize * 4)
return true; return true;
return false; // If each table would fit in a register, we should build it anyway.
for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
E = ResultTypes.end(); I != E; ++I) {
if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
return false;
}
return true;
} }
/// SwitchToLookupTable - If the switch is only used to initialize one or more /// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values, /// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables. /// replace the switch with lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI, static bool SwitchToLookupTable(SwitchInst *SI,
IRBuilder<> &Builder) { IRBuilder<> &Builder,
const TargetData* TD) {
assert(SI->getNumCases() > 1 && "Degenerate switch?"); assert(SI->getNumCases() > 1 && "Degenerate switch?");
// FIXME: Handle unreachable cases. // FIXME: Handle unreachable cases.
// FIXME: If the switch is too sparse for a lookup table, perhaps we could // FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that. // split off a dense part and build a lookup table for that.
// FIXME: If the results are all integers and the lookup table would fit in a
// target-legal register, we should store them as a bitmap and use shift/mask
// to look up the result.
// FIXME: This creates arrays of GEPs to constant strings, which means each // FIXME: This creates arrays of GEPs to constant strings, which means each
// GEP needs a runtime relocation in PIC code. We should just build one big // GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that. // string and lookup indices into that.
@ -3441,7 +3518,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
return false; return false;
uint64_t TableSize = RangeSpread.getLimitedValue() + 1; uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
if (!ShouldBuildLookupTable(SI, TableSize)) if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
return false; return false;
// Create the BB that does the lookups. // Create the BB that does the lookups.
@ -3467,7 +3544,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
PHINode *PHI = PHIs[I]; PHINode *PHI = PHIs[I];
SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI], SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
DefaultResults[PHI]); DefaultResults[PHI], TD);
Value *Result = Table.BuildLookup(TableIndex, Builder); Value *Result = Table.BuildLookup(TableIndex, Builder);
@ -3537,7 +3614,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI)) if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true; return SimplifyCFG(BB) | true;
if (SwitchToLookupTable(SI, Builder)) if (SwitchToLookupTable(SI, Builder, TD))
return SimplifyCFG(BB) | true; return SimplifyCFG(BB) | true;
return false; return false;

View File

@ -6,17 +6,14 @@ target triple = "x86_64-unknown-linux-gnu"
; The table for @f ; The table for @f
; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] ; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
; The int table for @h
; CHECK: @switch.table1 = private unnamed_addr constant [4 x i8] c"*\09X\05"
; The float table for @h ; The float table for @h
; CHECK: @switch.table2 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] ; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
; The table for @foostring ; The table for @foostring
; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] ; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
; The table for @earlyreturncrash ; The table for @earlyreturncrash
; CHECK: @switch.table4 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5] ; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
; A simple int-to-int selection switch. ; A simple int-to-int selection switch.
; It is dense enough to be replaced by table lookup. ; It is dense enough to be replaced by table lookup.
@ -88,14 +85,15 @@ sw.epilog:
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
; CHECK: switch.lookup: ; CHECK: switch.lookup:
; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table1, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8
; CHECK-NEXT: %switch.load = load i8* %switch.gep ; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table2, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
; CHECK-NEXT: %switch.load2 = load float* %switch.gep1 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load float* %switch.gep
; CHECK-NEXT: br label %sw.epilog ; CHECK-NEXT: br label %sw.epilog
; CHECK: sw.epilog: ; CHECK: sw.epilog:
; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] ; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] ; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0) ; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
; CHECK-NEXT: ret void ; CHECK-NEXT: ret void
} }
@ -137,7 +135,7 @@ return:
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 ; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup: ; CHECK: switch.lookup:
; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8** %switch.gep ; CHECK-NEXT: %switch.load = load i8** %switch.gep
; CHECK-NEXT: ret i8* %switch.load ; CHECK-NEXT: ret i8* %switch.load
} }
@ -166,9 +164,70 @@ sw.epilog:
; CHECK: @earlyreturncrash ; CHECK: @earlyreturncrash
; CHECK: switch.lookup: ; CHECK: switch.lookup:
; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table4, i32 0, i32 %switch.tableidx ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32* %switch.gep ; CHECK-NEXT: %switch.load = load i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load ; CHECK-NEXT: ret i32 %switch.load
; CHECK: sw.epilog: ; CHECK: sw.epilog:
; CHECK-NEXT: ret i32 7 ; CHECK-NEXT: ret i32 7
} }
; Example 7 from http://blog.regehr.org/archives/320
; It is not dense enough for a regular table, but the results
; can be packed into a bitmap.
define i32 @crud(i8 zeroext %c) {
entry:
%cmp = icmp ult i8 %c, 33
br i1 %cmp, label %lor.end, label %switch.early.test
switch.early.test:
switch i8 %c, label %lor.rhs [
i8 92, label %lor.end
i8 62, label %lor.end
i8 60, label %lor.end
i8 59, label %lor.end
i8 58, label %lor.end
i8 46, label %lor.end
i8 44, label %lor.end
i8 34, label %lor.end
i8 39, label %switch.edge
]
switch.edge: br label %lor.end
lor.rhs: br label %lor.end
lor.end:
%0 = phi i1 [ true, %switch.early.test ],
[ false, %lor.rhs ],
[ true, %entry ],
[ true, %switch.early.test ],
[ true, %switch.early.test ],
[ true, %switch.early.test ],
[ true, %switch.early.test ],
[ true, %switch.early.test ],
[ true, %switch.early.test ],
[ true, %switch.early.test ],
[ true, %switch.edge ]
%lor.ext = zext i1 %0 to i32
ret i32 %lor.ext
; CHECK: @crud
; CHECK: entry:
; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
; CHECK: switch.early.test:
; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
; CHECK: switch.lookup:
; CHECK-NEXT: %switch.zext = zext i8 %switch.tableidx to i59
; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.zext, 1
; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
; CHECK-NEXT: br label %lor.end
; CHECK: lor.end:
; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
; CHECK-NEXT: ret i32 %lor.ext
}