[PowerPC] Materialize i64 constants using rotation

Materializing full 64-bit constants on PPC64 can be expensive, requiring up to
5 instructions depending on the locations of the non-zero bits. Sometimes
materializing a rotated constant, and then applying the inverse rotation, requires
fewer instructions than the direct method. If so, do that instead.

In r225132, I added support for forming constants using bit inversion. In
effect, this reverts that commit and replaces it with rotation support. The bit
inversion is useful for turning constants that are mostly ones into ones that
are mostly zeros (thus enabling a more-efficient shift-based materialization),
but the same effect can be obtained by using negative constants and a rotate,
and that is at least as efficient, if not more.

llvm-svn: 225135
This commit is contained in:
Hal Finkel 2015-01-04 15:43:55 +00:00
parent 7c91552cd9
commit 241ba79f95
3 changed files with 76 additions and 32 deletions

View File

@ -602,16 +602,19 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
return Result; return Result;
} }
static uint64_t Rot64(uint64_t Imm, unsigned R) {
return (Imm << R) | (Imm >> (64 - R));
}
static unsigned SelectInt64Count(int64_t Imm) { static unsigned SelectInt64Count(int64_t Imm) {
unsigned DirectCount = SelectInt64CountDirect(Imm); unsigned Count = SelectInt64CountDirect(Imm);
// If might be cheaper to materialize the bit-inverted constant, and then for (unsigned r = 1; r < 63; ++r) {
// flip the bits (which takes one nor instruction). unsigned RCount = SelectInt64CountDirect(Rot64(Imm, r)) + 1;
unsigned NotDirectCount = SelectInt64CountDirect(~(uint64_t) Imm) + 1; Count = std::min(Count, RCount);
if (NotDirectCount < DirectCount) }
return NotDirectCount;
return DirectCount; return Count;
} }
// Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count // Select a 64-bit constant. For cost-modeling purposes, SelectInt64Count
@ -691,19 +694,27 @@ static SDNode *SelectInt64Direct(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
} }
static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) { static SDNode *SelectInt64(SelectionDAG *CurDAG, SDLoc dl, int64_t Imm) {
unsigned DirectCount = SelectInt64CountDirect(Imm); unsigned Count = SelectInt64CountDirect(Imm);
unsigned RMin = 0;
// If might be cheaper to materialize the bit-inverted constant, and then for (unsigned r = 1; r < 63; ++r) {
// flip the bits (which takes one nor instruction). unsigned RCount = SelectInt64CountDirect(Rot64(Imm, r)) + 1;
unsigned NotDirectCount = SelectInt64CountDirect(~(uint64_t) Imm) + 1; if (RCount < Count) {
if (NotDirectCount < DirectCount) { Count = RCount;
SDValue NotDirectVal = RMin = r;
SDValue(SelectInt64Direct(CurDAG, dl, ~(uint64_t) Imm), 0); }
return CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, NotDirectVal,
NotDirectVal);
} }
return SelectInt64Direct(CurDAG, dl, Imm); if (!RMin)
return SelectInt64Direct(CurDAG, dl, Imm);
auto getI32Imm = [CurDAG](unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
};
SDValue Val = SDValue(SelectInt64Direct(CurDAG, dl, Rot64(Imm, RMin)), 0);
return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Val,
getI32Imm(64 - RMin), getI32Imm(0));
} }
// Select a 64-bit constant. // Select a 64-bit constant.

View File

@ -8,7 +8,6 @@ TODO:
On PPC64, this: On PPC64, this:
long f2 (long x) { return 0xfffffff000000000UL; } long f2 (long x) { return 0xfffffff000000000UL; }
long f3 (long x) { return 0x1ffffffffUL; }
could compile into: could compile into:
@ -16,10 +15,6 @@ _f2:
li r3,-1 li r3,-1
rldicr r3,r3,0,27 rldicr r3,r3,0,27
blr blr
_f3:
li r3,-1
rldicl r3,r3,0,31
blr
we produce: we produce:
@ -28,12 +23,6 @@ _f2:
ori r2, r2, 65535 ori r2, r2, 65535
sldi r3, r2, 36 sldi r3, r2, 36
blr blr
_f3:
li r2, 1
sldi r2, r2, 32
oris r2, r2, 65535
ori r3, r2, 65535
blr
===-------------------------------------------------------------------------=== ===-------------------------------------------------------------------------===

View File

@ -8,13 +8,57 @@ entry:
ret i64 281474976710655 ret i64 281474976710655
; CHECK-LABEL: @cn1 ; CHECK-LABEL: @cn1
; CHECK: li [[REG1:[0-9]+]], 0 ; CHECK: lis [[REG1:[0-9]+]], -1
; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65535 ; CHECK: rldicl 3, [[REG1]], 48, 0
; CHECK: sldi [[REG3:[0-9]+]], [[REG2]], 48
; CHECK: nor 3, [[REG3]], [[REG3]]
; CHECK: blr ; CHECK: blr
} }
; Function Attrs: nounwind readnone
define i64 @cnb() #0 {
entry:
ret i64 281474976710575
; CHECK-LABEL: @cnb
; CHECK: lis [[REG1:[0-9]+]], -81
; CHECK: rldicl 3, [[REG1]], 48, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @f2n(i64 %x) #0 {
entry:
ret i64 68719476735
; CHECK-LABEL: @f2n
; CHECK: lis [[REG1:[0-9]+]], -4096
; CHECK: rldicl 3, [[REG1]], 36, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @f3(i64 %x) #0 {
entry:
ret i64 8589934591
; CHECK-LABEL: @f3
; CHECK: lis [[REG1:[0-9]+]], -32768
; CHECK: rldicl 3, [[REG1]], 33, 0
; CHECK: blr
}
; Function Attrs: nounwind readnone
define i64 @cn2n() #0 {
entry:
ret i64 -1407374887747585
; CHECK-LABEL: @cn2n
; CHECK: lis [[REG1:[0-9]+]], -5121
; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65534
; CHECK: rldicl 3, [[REG2]], 22, 0
; CHECK: blr
}
attributes #0 = { nounwind readnone } attributes #0 = { nounwind readnone }