mirror of https://github.com/llvm/circt.git
[CombFolds] Simplify "extract of and" from a contiguous range.
This reduces the size of the extract, which allows recursive folding into other operations. This shrinks the .v file generated for RocketCore.fir by 3.3%, from 2994 to 2897 lines.
This commit is contained in:
parent
231f389bcd
commit
606d024a2f
|
@ -213,7 +213,7 @@ def SExtOp : CombOp<"sext", [NoSideEffect]> {
|
|||
def ConcatOp : VariadicOp<"concat", [InferTypeOpInterface]> {
|
||||
let summary = "Concatenate a variadic list of operands together.";
|
||||
let description = [{
|
||||
See the HW-SV rationale document for details on operand ordering.
|
||||
See the comb rationale document for details on operand ordering.
|
||||
}];
|
||||
|
||||
let hasFolder = true;
|
||||
|
|
|
@ -602,13 +602,38 @@ LogicalResult ExtractOp::canonicalize(ExtractOp op, PatternRewriter &rewriter) {
|
|||
isa<AndOp, OrOp, XorOp>(inputOp)) {
|
||||
if (auto cstRHS = inputOp->getOperand(1).getDefiningOp<hw::ConstantOp>()) {
|
||||
auto extractedCst =
|
||||
cstRHS.getValue().lshr(op.lowBit()).trunc(op.getType().getWidth());
|
||||
if ((isa<AndOp>(inputOp) && extractedCst.isAllOnes()) ||
|
||||
(isa<OrOp, XorOp>(inputOp) && extractedCst.isZero())) {
|
||||
cstRHS.getValue().extractBits(op.getType().getWidth(), op.lowBit());
|
||||
if (isa<OrOp, XorOp>(inputOp) && extractedCst.isZero()) {
|
||||
rewriter.replaceOpWithNewOp<ExtractOp>(
|
||||
op, op.getType(), inputOp->getOperand(0), op.lowBit());
|
||||
return success();
|
||||
}
|
||||
|
||||
// `extract(and(a, cst))` -> `concat(extract(a), 0)` if we only need one
|
||||
// extract to represent the result. Turning it into a pile of extracts is
|
||||
// always fine by our cost model, but we don't want to explode things into
|
||||
// a ton of bits because it will bloat the IR and generated Verilog.
|
||||
if (isa<AndOp>(inputOp)) {
|
||||
// For our cost model, we only do this if the bit pattern is a
|
||||
// contiguous series of ones.
|
||||
unsigned lz = extractedCst.countLeadingZeros();
|
||||
unsigned tz = extractedCst.countTrailingZeros();
|
||||
unsigned pop = extractedCst.countPopulation();
|
||||
if (extractedCst.getBitWidth() - lz - tz == pop) {
|
||||
auto resultTy = rewriter.getIntegerType(pop);
|
||||
SmallVector<Value> resultElts;
|
||||
if (lz)
|
||||
resultElts.push_back(rewriter.create<hw::ConstantOp>(
|
||||
op.getLoc(), APInt::getZero(lz)));
|
||||
resultElts.push_back(rewriter.createOrFold<ExtractOp>(
|
||||
op.getLoc(), resultTy, inputOp->getOperand(0), op.lowBit() + tz));
|
||||
if (tz)
|
||||
resultElts.push_back(rewriter.create<hw::ConstantOp>(
|
||||
op.getLoc(), APInt::getZero(tz)));
|
||||
rewriter.replaceOpWithNewOp<ConcatOp>(op, resultElts);
|
||||
return success();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -667,6 +667,30 @@ hw.module @narrowBitwiseOpsInsertionPointRegression(%a: i8) -> (out: i1) {
|
|||
hw.output %6 : i1
|
||||
}
|
||||
|
||||
// CHECK-LABEL: hw.module @narrow_extract_from_and
|
||||
hw.module @narrow_extract_from_and(%arg0: i32) -> (o1: i8, o2: i14, o3: i8) {
|
||||
%c240_i32 = hw.constant 240 : i32 // 0xF0
|
||||
%0 = comb.and %arg0, %c240_i32 : i32
|
||||
%1 = comb.extract %0 from 3 : (i32) -> i8
|
||||
|
||||
%2 = comb.extract %0 from 2 : (i32) -> i14
|
||||
|
||||
// CHECK: %0 = comb.extract %arg0 from 2 : (i32) -> i14
|
||||
// CHECK: %1 = comb.and %0, %c60_i14 : i14
|
||||
|
||||
// CHECK: %2 = comb.extract %arg0 from 4 : (i32) -> i4
|
||||
// CHECK: %3 = comb.concat %c0_i3, %2, %false : i3, i4, i1
|
||||
%c42_i32 = hw.constant 42 : i32 // 0b101010
|
||||
%3 = comb.and %arg0, %c42_i32 : i32
|
||||
%4 = comb.extract %3 from 1 : (i32) -> i8
|
||||
// CHECK: %4 = comb.extract %arg0 from 1 : (i32) -> i8
|
||||
// CHECK: %5 = comb.and %4, %c21_i8 : i8
|
||||
// CHECK: hw.output %3, %1, %5 : i8, i14, i8
|
||||
|
||||
hw.output %1, %2, %4 : i8, i14, i8
|
||||
}
|
||||
|
||||
|
||||
// CHECK-LABEL: hw.module @fold_mux_tree1
|
||||
hw.module @fold_mux_tree1(%sel: i2, %a: i8, %b: i8, %c: i8, %d: i8) -> (y: i8) {
|
||||
// CHECK-NEXT: %0 = hw.array_create %d, %c, %b, %a : i8
|
||||
|
|
Loading…
Reference in New Issue