[CombFolds] Simplify "extract of and" from a contiguous range.

This reduces the size of the extract, which allows recursive
folding into other operations.  This shrinks the .v file generated
for RocketCore.fir by 3.3%, from 2994 to 2897 lines.
This commit is contained in:
Chris Lattner 2021-11-25 21:42:51 -08:00
parent 231f389bcd
commit 606d024a2f
3 changed files with 53 additions and 4 deletions

View File

@ -213,7 +213,7 @@ def SExtOp : CombOp<"sext", [NoSideEffect]> {
def ConcatOp : VariadicOp<"concat", [InferTypeOpInterface]> {
let summary = "Concatenate a variadic list of operands together.";
let description = [{
See the HW-SV rationale document for details on operand ordering.
See the comb rationale document for details on operand ordering.
}];
let hasFolder = true;

View File

@ -602,13 +602,38 @@ LogicalResult ExtractOp::canonicalize(ExtractOp op, PatternRewriter &rewriter) {
isa<AndOp, OrOp, XorOp>(inputOp)) {
if (auto cstRHS = inputOp->getOperand(1).getDefiningOp<hw::ConstantOp>()) {
auto extractedCst =
cstRHS.getValue().lshr(op.lowBit()).trunc(op.getType().getWidth());
if ((isa<AndOp>(inputOp) && extractedCst.isAllOnes()) ||
(isa<OrOp, XorOp>(inputOp) && extractedCst.isZero())) {
cstRHS.getValue().extractBits(op.getType().getWidth(), op.lowBit());
if (isa<OrOp, XorOp>(inputOp) && extractedCst.isZero()) {
rewriter.replaceOpWithNewOp<ExtractOp>(
op, op.getType(), inputOp->getOperand(0), op.lowBit());
return success();
}
// `extract(and(a, cst))` -> `concat(extract(a), 0)` if we only need one
// extract to represent the result. Turning it into a pile of extracts is
// always fine by our cost model, but we don't want to explode things into
// a ton of bits because it will bloat the IR and generated Verilog.
if (isa<AndOp>(inputOp)) {
// For our cost model, we only do this if the bit pattern is a
// contiguous series of ones.
unsigned lz = extractedCst.countLeadingZeros();
unsigned tz = extractedCst.countTrailingZeros();
unsigned pop = extractedCst.countPopulation();
if (extractedCst.getBitWidth() - lz - tz == pop) {
auto resultTy = rewriter.getIntegerType(pop);
SmallVector<Value> resultElts;
if (lz)
resultElts.push_back(rewriter.create<hw::ConstantOp>(
op.getLoc(), APInt::getZero(lz)));
resultElts.push_back(rewriter.createOrFold<ExtractOp>(
op.getLoc(), resultTy, inputOp->getOperand(0), op.lowBit() + tz));
if (tz)
resultElts.push_back(rewriter.create<hw::ConstantOp>(
op.getLoc(), APInt::getZero(tz)));
rewriter.replaceOpWithNewOp<ConcatOp>(op, resultElts);
return success();
}
}
}
}

View File

@ -667,6 +667,30 @@ hw.module @narrowBitwiseOpsInsertionPointRegression(%a: i8) -> (out: i1) {
hw.output %6 : i1
}
// CHECK-LABEL: hw.module @narrow_extract_from_and
hw.module @narrow_extract_from_and(%arg0: i32) -> (o1: i8, o2: i14, o3: i8) {
%c240_i32 = hw.constant 240 : i32 // 0xF0
%0 = comb.and %arg0, %c240_i32 : i32
%1 = comb.extract %0 from 3 : (i32) -> i8
%2 = comb.extract %0 from 2 : (i32) -> i14
// CHECK: %0 = comb.extract %arg0 from 2 : (i32) -> i14
// CHECK: %1 = comb.and %0, %c60_i14 : i14
// CHECK: %2 = comb.extract %arg0 from 4 : (i32) -> i4
// CHECK: %3 = comb.concat %c0_i3, %2, %false : i3, i4, i1
%c42_i32 = hw.constant 42 : i32 // 0b101010
%3 = comb.and %arg0, %c42_i32 : i32
%4 = comb.extract %3 from 1 : (i32) -> i8
// CHECK: %4 = comb.extract %arg0 from 1 : (i32) -> i8
// CHECK: %5 = comb.and %4, %c21_i8 : i8
// CHECK: hw.output %3, %1, %5 : i8, i14, i8
hw.output %1, %2, %4 : i8, i14, i8
}
// CHECK-LABEL: hw.module @fold_mux_tree1
hw.module @fold_mux_tree1(%sel: i2, %a: i8, %b: i8, %c: i8, %d: i8) -> (y: i8) {
// CHECK-NEXT: %0 = hw.array_create %d, %c, %b, %a : i8