[APInt] Add APInt::insertBits() method to insert an APInt into a larger APInt
We currently have to insert bits via a temporary variable of the same size as the target with various shift/mask stages, resulting in further temporary variables, all of which require the allocation of memory for large APInts (MaskSizeInBits > 64). This is another of the compile time issues identified in PR32037 (see also D30265). This patch adds the APInt::insertBits() helper method which avoids the temporary memory allocation and masks/inserts the raw bits directly into the target. Differential Revision: https://reviews.llvm.org/D30780 llvm-svn: 297458
This commit is contained in:
parent
7090d145e8
commit
b02667c469
|
@ -1243,6 +1243,9 @@ public:
|
|||
/// as "bitPosition".
|
||||
void flipBit(unsigned bitPosition);
|
||||
|
||||
/// Insert the bits from a smaller APInt starting at bitPosition.
|
||||
void insertBits(const APInt &SubBits, unsigned bitPosition);
|
||||
|
||||
/// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
|
||||
APInt extractBits(unsigned numBits, unsigned bitPosition) const;
|
||||
|
||||
|
|
|
@ -7523,11 +7523,11 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
|
|||
if (OpVal.isUndef())
|
||||
SplatUndef.setBits(BitPos, BitPos + EltBitSize);
|
||||
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
|
||||
SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
|
||||
zextOrTrunc(sz) << BitPos;
|
||||
SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize),
|
||||
BitPos);
|
||||
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
|
||||
SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
|
||||
else
|
||||
SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -588,6 +588,65 @@ void APInt::flipBit(unsigned bitPosition) {
|
|||
else setBit(bitPosition);
|
||||
}
|
||||
|
||||
void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
|
||||
unsigned subBitWidth = subBits.getBitWidth();
|
||||
assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
|
||||
"Illegal bit insertion");
|
||||
|
||||
// Insertion is a direct copy.
|
||||
if (subBitWidth == BitWidth) {
|
||||
*this = subBits;
|
||||
return;
|
||||
}
|
||||
|
||||
// Single word result can be done as a direct bitmask.
|
||||
if (isSingleWord()) {
|
||||
uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
|
||||
VAL &= ~(mask << bitPosition);
|
||||
VAL |= (subBits.VAL << bitPosition);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned loBit = whichBit(bitPosition);
|
||||
unsigned loWord = whichWord(bitPosition);
|
||||
unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
|
||||
|
||||
// Insertion within a single word can be done as a direct bitmask.
|
||||
if (loWord == hi1Word) {
|
||||
uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
|
||||
pVal[loWord] &= ~(mask << loBit);
|
||||
pVal[loWord] |= (subBits.VAL << loBit);
|
||||
return;
|
||||
}
|
||||
|
||||
// Insert on word boundaries.
|
||||
if (loBit == 0) {
|
||||
// Direct copy whole words.
|
||||
unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
|
||||
memcpy(pVal + loWord, subBits.getRawData(),
|
||||
numWholeSubWords * APINT_WORD_SIZE);
|
||||
|
||||
// Mask+insert remaining bits.
|
||||
unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
|
||||
if (remainingBits != 0) {
|
||||
uint64_t mask = UINT64_MAX >> (APINT_BITS_PER_WORD - remainingBits);
|
||||
pVal[hi1Word] &= ~mask;
|
||||
pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// General case - set/clear individual bits in dst based on src.
|
||||
// TODO - there is scope for optimization here, but at the moment this code
|
||||
// path is barely used so prefer readability over performance.
|
||||
for (unsigned i = 0; i != subBitWidth; ++i) {
|
||||
if (subBits[i])
|
||||
setBit(bitPosition + i);
|
||||
else
|
||||
clearBit(bitPosition + i);
|
||||
}
|
||||
}
|
||||
|
||||
APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
|
||||
assert(numBits > 0 && "Can't extract zero bits");
|
||||
assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
|
||||
|
|
|
@ -5318,12 +5318,11 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
|||
return true;
|
||||
}
|
||||
if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
|
||||
Mask |= CInt->getValue().zextOrTrunc(SizeInBits).shl(BitOffset);
|
||||
Mask.insertBits(CInt->getValue(), BitOffset);
|
||||
return true;
|
||||
}
|
||||
if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
|
||||
APInt CstBits = CFP->getValueAPF().bitcastToAPInt();
|
||||
Mask |= CstBits.zextOrTrunc(SizeInBits).shl(BitOffset);
|
||||
Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -5340,7 +5339,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
|
|||
}
|
||||
auto *Cst = cast<ConstantSDNode>(Src);
|
||||
APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
|
||||
MaskBits |= Bits.zext(SizeInBits).shl(BitOffset);
|
||||
MaskBits.insertBits(Bits, BitOffset);
|
||||
}
|
||||
return SplitBitData();
|
||||
}
|
||||
|
|
|
@ -91,8 +91,7 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
|
|||
continue;
|
||||
}
|
||||
|
||||
auto *Elt = cast<ConstantInt>(COp);
|
||||
MaskBits |= Elt->getValue().zextOrTrunc(CstSizeInBits).shl(BitOffset);
|
||||
MaskBits.insertBits(cast<ConstantInt>(COp)->getValue(), BitOffset);
|
||||
}
|
||||
|
||||
// Now extract the undef/constant bit data into the raw shuffle masks.
|
||||
|
|
|
@ -1647,6 +1647,59 @@ TEST(APIntTest, reverseBits) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(APIntTest, insertBits) {
|
||||
APInt iSrc(31, 0x00123456);
|
||||
|
||||
// Direct copy.
|
||||
APInt i31(31, 0x76543210ull);
|
||||
i31.insertBits(iSrc, 0);
|
||||
EXPECT_EQ(static_cast<int64_t>(0x00123456ull), i31.getSExtValue());
|
||||
|
||||
// Single word src/dst insertion.
|
||||
APInt i63(63, 0x01234567FFFFFFFFull);
|
||||
i63.insertBits(iSrc, 4);
|
||||
EXPECT_EQ(static_cast<int64_t>(0x012345600123456Full), i63.getSExtValue());
|
||||
|
||||
// Insert single word src into one word of dst.
|
||||
APInt i120(120, UINT64_MAX, true);
|
||||
i120.insertBits(iSrc, 8);
|
||||
EXPECT_EQ(static_cast<int64_t>(0xFFFFFF80123456FFull), i120.getSExtValue());
|
||||
|
||||
// Insert single word src into two words of dst.
|
||||
APInt i127(127, UINT64_MAX, true);
|
||||
i127.insertBits(iSrc, 48);
|
||||
EXPECT_EQ(i127.extractBits(64, 0).getZExtValue(), 0x3456FFFFFFFFFFFF);
|
||||
EXPECT_EQ(i127.extractBits(63, 64).getZExtValue(), 0x7FFFFFFFFFFF8012);
|
||||
|
||||
// Insert on word boundaries.
|
||||
APInt i128(128, 0);
|
||||
i128.insertBits(APInt(64, UINT64_MAX, true), 0);
|
||||
i128.insertBits(APInt(64, UINT64_MAX, true), 64);
|
||||
EXPECT_EQ(-1, i128.getSExtValue());
|
||||
|
||||
APInt i256(256, UINT64_MAX, true);
|
||||
i256.insertBits(APInt(65, 0), 0);
|
||||
i256.insertBits(APInt(69, 0), 64);
|
||||
i256.insertBits(APInt(128, 0), 128);
|
||||
EXPECT_EQ(0u, i256.getSExtValue());
|
||||
|
||||
APInt i257(257, 0);
|
||||
i257.insertBits(APInt(96, UINT64_MAX, true), 64);
|
||||
EXPECT_EQ(i257.extractBits(64, 0).getZExtValue(), 0x0000000000000000);
|
||||
EXPECT_EQ(i257.extractBits(64, 64).getZExtValue(), 0xFFFFFFFFFFFFFFFF);
|
||||
EXPECT_EQ(i257.extractBits(64, 128).getZExtValue(), 0x00000000FFFFFFFF);
|
||||
EXPECT_EQ(i257.extractBits(65, 192).getZExtValue(), 0x0000000000000000);
|
||||
|
||||
// General insertion.
|
||||
APInt i260(260, UINT64_MAX, true);
|
||||
i260.insertBits(APInt(129, 1ull << 48), 15);
|
||||
EXPECT_EQ(i260.extractBits(64, 0).getZExtValue(), 0x8000000000007FFF);
|
||||
EXPECT_EQ(i260.extractBits(64, 64).getZExtValue(), 0x0000000000000000);
|
||||
EXPECT_EQ(i260.extractBits(64, 128).getZExtValue(), 0xFFFFFFFFFFFF0000);
|
||||
EXPECT_EQ(i260.extractBits(64, 192).getZExtValue(), 0xFFFFFFFFFFFFFFFF);
|
||||
EXPECT_EQ(i260.extractBits(4, 256).getZExtValue(), 0x000000000000000F);
|
||||
}
|
||||
|
||||
TEST(APIntTest, extractBits) {
|
||||
APInt i32(32, 0x1234567);
|
||||
EXPECT_EQ(0x3456, i32.extractBits(16, 4));
|
||||
|
|
Loading…
Reference in New Issue