From 606d024a2f0e3497254fe5c95cf57ffb5ff8fa95 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 25 Nov 2021 21:42:51 -0800 Subject: [PATCH] [CombFolds] Simplify "extract of and" from a contiguous range. This reduces the size of the extract, which allows recursive folding into other operations. This shrinks the .v file generated for RocketCore.fir by 3.3%, from 2994 to 2897 lines. --- include/circt/Dialect/Comb/Combinational.td | 2 +- lib/Dialect/Comb/CombFolds.cpp | 31 +++++++++++++++++++-- test/Dialect/Comb/canonicalization.mlir | 24 ++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/include/circt/Dialect/Comb/Combinational.td b/include/circt/Dialect/Comb/Combinational.td index 6cb5a5c6f9..5def28942b 100644 --- a/include/circt/Dialect/Comb/Combinational.td +++ b/include/circt/Dialect/Comb/Combinational.td @@ -213,7 +213,7 @@ def SExtOp : CombOp<"sext", [NoSideEffect]> { def ConcatOp : VariadicOp<"concat", [InferTypeOpInterface]> { let summary = "Concatenate a variadic list of operands together."; let description = [{ - See the HW-SV rationale document for details on operand ordering. + See the comb rationale document for details on operand ordering. }]; let hasFolder = true; diff --git a/lib/Dialect/Comb/CombFolds.cpp b/lib/Dialect/Comb/CombFolds.cpp index a7c35bfc0e..100ad12acf 100644 --- a/lib/Dialect/Comb/CombFolds.cpp +++ b/lib/Dialect/Comb/CombFolds.cpp @@ -602,13 +602,38 @@ LogicalResult ExtractOp::canonicalize(ExtractOp op, PatternRewriter &rewriter) { isa(inputOp)) { if (auto cstRHS = inputOp->getOperand(1).getDefiningOp()) { auto extractedCst = - cstRHS.getValue().lshr(op.lowBit()).trunc(op.getType().getWidth()); - if ((isa(inputOp) && extractedCst.isAllOnes()) || - (isa(inputOp) && extractedCst.isZero())) { + cstRHS.getValue().extractBits(op.getType().getWidth(), op.lowBit()); + if (isa(inputOp) && extractedCst.isZero()) { rewriter.replaceOpWithNewOp( op, op.getType(), inputOp->getOperand(0), op.lowBit()); return success(); } + + // `extract(and(a, cst))` -> `concat(extract(a), 0)` if we only need one + // extract to represent the result. Turning it into a pile of extracts is + // always fine by our cost model, but we don't want to explode things into + // a ton of bits because it will bloat the IR and generated Verilog. + if (isa(inputOp)) { + // For our cost model, we only do this if the bit pattern is a + // contiguous series of ones. + unsigned lz = extractedCst.countLeadingZeros(); + unsigned tz = extractedCst.countTrailingZeros(); + unsigned pop = extractedCst.countPopulation(); + if (extractedCst.getBitWidth() - lz - tz == pop) { + auto resultTy = rewriter.getIntegerType(pop); + SmallVector resultElts; + if (lz) + resultElts.push_back(rewriter.create( + op.getLoc(), APInt::getZero(lz))); + resultElts.push_back(rewriter.createOrFold( + op.getLoc(), resultTy, inputOp->getOperand(0), op.lowBit() + tz)); + if (tz) + resultElts.push_back(rewriter.create( + op.getLoc(), APInt::getZero(tz))); + rewriter.replaceOpWithNewOp(op, resultElts); + return success(); + } + } } } diff --git a/test/Dialect/Comb/canonicalization.mlir b/test/Dialect/Comb/canonicalization.mlir index edab729967..65aa01383e 100644 --- a/test/Dialect/Comb/canonicalization.mlir +++ b/test/Dialect/Comb/canonicalization.mlir @@ -667,6 +667,30 @@ hw.module @narrowBitwiseOpsInsertionPointRegression(%a: i8) -> (out: i1) { hw.output %6 : i1 } +// CHECK-LABEL: hw.module @narrow_extract_from_and +hw.module @narrow_extract_from_and(%arg0: i32) -> (o1: i8, o2: i14, o3: i8) { + %c240_i32 = hw.constant 240 : i32 // 0xF0 + %0 = comb.and %arg0, %c240_i32 : i32 + %1 = comb.extract %0 from 3 : (i32) -> i8 + + %2 = comb.extract %0 from 2 : (i32) -> i14 + + // CHECK: %0 = comb.extract %arg0 from 2 : (i32) -> i14 + // CHECK: %1 = comb.and %0, %c60_i14 : i14 + + // CHECK: %2 = comb.extract %arg0 from 4 : (i32) -> i4 + // CHECK: %3 = comb.concat %c0_i3, %2, %false : i3, i4, i1 + %c42_i32 = hw.constant 42 : i32 // 0b101010 + %3 = comb.and %arg0, %c42_i32 : i32 + %4 = comb.extract %3 from 1 : (i32) -> i8 + // CHECK: %4 = comb.extract %arg0 from 1 : (i32) -> i8 + // CHECK: %5 = comb.and %4, %c21_i8 : i8 + // CHECK: hw.output %3, %1, %5 : i8, i14, i8 + + hw.output %1, %2, %4 : i8, i14, i8 +} + + // CHECK-LABEL: hw.module @fold_mux_tree1 hw.module @fold_mux_tree1(%sel: i2, %a: i8, %b: i8, %c: i8, %d: i8) -> (y: i8) { // CHECK-NEXT: %0 = hw.array_create %d, %c, %b, %a : i8