[Sim] Add format string type and format specifier ops (#7208)

Adds just the operations. Appropriate lowerings are to be added in future commits.
This commit is contained in:
fzi-hielscher 2024-07-09 17:31:37 +02:00 committed by GitHub
parent 824477b416
commit 0899943a5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 556 additions and 2 deletions

View File

@ -20,5 +20,6 @@ include "mlir/IR/SymbolInterfaces.td"
include "circt/Dialect/Sim/SimDialect.td"
include "circt/Dialect/Sim/SimOps.td"
include "circt/Dialect/Sim/SimTypes.td"
#endif // CIRCT_DIALECT_SIM_SIM_TD

View File

@ -22,9 +22,17 @@ def SimDialect : Dialect {
The `sim` dialect is intented to model simulator-specific operations.
}];
let useDefaultTypePrinterParser = 0;
let useDefaultAttributePrinterParser = 0;
let dependentDialects = ["circt::hw::HWDialect"];
let useDefaultAttributePrinterParser = 0;
let useDefaultTypePrinterParser = 1;
let hasConstantMaterializer = 1;
let extraClassDeclaration = [{
/// Register all Sim types.
void registerTypes();
}];
}
#endif // CIRCT_DIALECT_SIM_SIMDIALECT

View File

@ -21,6 +21,7 @@
#include "circt/Dialect/Seq/SeqDialect.h"
#include "circt/Dialect/Seq/SeqTypes.h"
#include "circt/Dialect/Sim/SimDialect.h"
#include "circt/Dialect/Sim/SimTypes.h"
#include "circt/Support/BuilderUtils.h"
#include "mlir/Interfaces/CallInterfaces.h"
#include "mlir/Interfaces/FunctionInterfaces.h"

View File

@ -16,6 +16,7 @@
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/Interfaces/FunctionInterfaces.td"
include "circt/Dialect/Sim/SimDialect.td"
include "circt/Dialect/Sim/SimTypes.td"
include "circt/Dialect/Seq/SeqTypes.td"
include "circt/Dialect/HW/HWOpInterfaces.td"
include "circt/Dialect/HW/HWTypes.td"
@ -146,4 +147,157 @@ def DPICallOp : SimOp<"func.dpi.call",
}
def FormatLitOp : SimOp<"fmt.lit", [Pure, ConstantLike]> {
let summary = "Literal string token";
let description = [{
Creates a constant, raw ASCII string literal for formatted printing.
The given string attribute will be outputted as is,
including non-printable characters. The literal may be empty or contain
null characters ('\0') which must not be interpreted as string
terminators by backends.
}];
let assemblyFormat = "$literal attr-dict";
let arguments = (ins StrAttr:$literal);
let results = (outs FormatStringType:$result);
let hasFolder = true;
}
def FormatHexOp : SimOp<"fmt.hex", [Pure]> {
let summary = "Hexadecimal format specifier";
let description = [{
Format the given integer value as lower-case hexadecimal string.
The printed value will be left-padded with '0' up to the
length required to print the maximum value of the argument's
type. Zero width values will produce the empty string.
No further prefix will be added.
}];
let arguments = (ins AnyInteger:$value);
let results = (outs FormatStringType:$result);
let hasFolder = true;
let assemblyFormat = "$value attr-dict `:` qualified(type($value))";
}
def FormatBinOp : SimOp<"fmt.bin", [Pure]> {
let summary = "Binary format specifier";
let description = [{
Format the given integer value as binary (base two) string.
The printed value will be left-padded with '0' up to the number
of bits of the argument's type. Zero width values will produce
the empty string. No further prefix will be added.
}];
let arguments = (ins AnyInteger:$value);
let results = (outs FormatStringType:$result);
let hasFolder = true;
let assemblyFormat = "$value attr-dict `:` qualified(type($value))";
}
def FormatDecOp : SimOp<"fmt.dec", [Pure]> {
let summary = "Decimal format specifier";
let description = [{
Format the given integer value as signed or unsigned decimal string.
Leading zeros are omitted. Non-negative or unsigned values will
_not_ be prefixed with a '+'.
For unsigned formatting, the printed value will
be left-padded with spaces up to _at least_ the length required to print
the maximum unsigned value of the argument's type.
For signed formatting, the printed value will be
left-padded with spaces up to _at least_ the length required
to print the minimum signed value of the argument's type
including the '-' character.
E.g., a zero value of type `i1` requires no padding for unsigned
formatting and one leading space for signed formatting.
Format specifiers of same argument type and signedness must be
padded to the same width. Zero width values will produce
a single '0'.
Backends are recommended to not exceed the required amount of padding.
}];
let arguments = (ins AnyInteger:$value, UnitAttr:$isSigned);
let results = (outs FormatStringType:$result);
let hasFolder = true;
let assemblyFormat = "(`signed` $isSigned^)? $value attr-dict `:` qualified(type($value))";
let extraClassDeclaration = [{
static inline unsigned getDecimalWidth(unsigned bits, bool isSigned) {
if (bits == 0)
return 1;
if (bits == 1)
return isSigned ? 2 : 1;
if (isSigned)
bits--;
// Should be precise up until bits = 13301
const double baseConversionFactor = 0.30103; // log(2) / log(10) + epsilon
unsigned digits = std::ceil(bits * baseConversionFactor);
return isSigned ? digits + 1 : digits;
}
}];
}
def FormatCharOp : SimOp<"fmt.char", [Pure]> {
let summary = "Character format specifier";
let description = [{
Format the given integer value as a single character.
For integer values up to 127, ASCII compatible encoding is assumed.
For larger values, the encoding is unspecified.
If the argument's type width is less than eight bits, the value is
zero extended.
If the width is greater than eight bits, the resulting formatted string
is undefined.
}];
let arguments = (ins AnyInteger:$value);
let results = (outs FormatStringType:$result);
let hasFolder = true;
let assemblyFormat = "$value attr-dict `:` qualified(type($value))";
}
def FormatStringConcatOp : SimOp<"fmt.concat", [Pure]> {
let summary = "Concatenate format strings";
let description = [{
Concatenates an arbitrary number of format strings from
left to right. If the argument list is empty, the empty string
literal is produced.
Concatenations must not be recursive. I.e., a concatenated string should
not contain itself directly or indirectly.
}];
let arguments = (ins Variadic<FormatStringType>:$inputs);
let results = (outs FormatStringType:$result);
let hasFolder = true;
let hasCanonicalizeMethod = true;
let hasVerifier = true;
let assemblyFormat = "` ` `(` $inputs `)` attr-dict";
}
#endif // CIRCT_DIALECT_SIM_SIMOPS_TD

View File

@ -0,0 +1,18 @@
//===- SimTypes.h - Sim dialect types ---------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef CIRCT_DIALECT_SIM_SIMTYPES_H
#define CIRCT_DIALECT_SIM_SIMTYPES_H
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Types.h"
#define GET_TYPEDEF_CLASSES
#include "circt/Dialect/Sim/SimTypes.h.inc"
#endif // CIRCT_DIALECT_SIM_SIMTYPES_H

View File

@ -0,0 +1,29 @@
//===- SimTypes.td - Sim dialect types ---------------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef CIRCT_DIALECT_SIM_SIMTYPES_TD
#define CIRCT_DIALECT_SIM_SIMTYPES_TD
include "circt/Dialect/Sim/SimDialect.td"
include "mlir/IR/AttrTypeBase.td"
class SimTypeDef<string name> : TypeDef<SimDialect, name> { }
def FormatStringType : SimTypeDef<"FormatString"> {
let mnemonic = "fstring";
let summary = "Format string type";
let description = [{
A format string type represents either a single formatting token or the
concatenation of an arbitrary but finite number of tokens.
A formatting token is either a static string literal or the association of
a dynamic hardware value with a format specifier.
}];
}
#endif // CIRCT_DIALECT_SIM_SIMTYPES_TD

View File

@ -12,6 +12,7 @@
add_circt_dialect_library(CIRCTSim
SimDialect.cpp
SimOps.cpp
SimTypes.cpp
ADDITIONAL_HEADER_DIRS
${CIRCT_MAIN_INCLUDE_DIR}/circt/Dialect/Sim

View File

@ -29,6 +29,19 @@ void SimDialect::initialize() {
#define GET_OP_LIST
#include "circt/Dialect/Sim/Sim.cpp.inc"
>();
registerTypes();
}
#include "circt/Dialect/Sim/SimDialect.cpp.inc"
Operation *SimDialect::materializeConstant(::mlir::OpBuilder &builder,
::mlir::Attribute value,
::mlir::Type type,
::mlir::Location loc) {
if (auto fmtStrType = llvm::dyn_cast<FormatStringType>(type))
return builder.create<FormatLitOp>(loc, fmtStrType,
llvm::cast<StringAttr>(value));
return nullptr;
}

View File

@ -12,6 +12,7 @@
#include "circt/Dialect/Sim/SimOps.h"
#include "circt/Dialect/HW/ModuleImplementation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Interfaces/FunctionImplementation.h"
using namespace mlir;
@ -102,6 +103,180 @@ void DPIFuncOp::print(OpAsmPrinter &p) {
getPerArgumentAttrsAttrName(), getArgumentLocsAttrName()});
}
OpFoldResult FormatLitOp::fold(FoldAdaptor adaptor) { return getLiteralAttr(); }
OpFoldResult FormatDecOp::fold(FoldAdaptor adaptor) {
if (getValue().getType() == IntegerType::get(getContext(), 0U))
return StringAttr::get(getContext(), "0");
if (auto intAttr = llvm::dyn_cast_or_null<IntegerAttr>(adaptor.getValue())) {
SmallVector<char, 16> strBuf;
intAttr.getValue().toString(strBuf, 10U, getIsSigned());
unsigned width = intAttr.getType().getIntOrFloatBitWidth();
unsigned padWidth = FormatDecOp::getDecimalWidth(width, getIsSigned());
padWidth = padWidth > strBuf.size() ? padWidth - strBuf.size() : 0;
SmallVector<char, 8> padding(padWidth, ' ');
return StringAttr::get(getContext(), Twine(padding) + Twine(strBuf));
}
return {};
}
OpFoldResult FormatHexOp::fold(FoldAdaptor adaptor) {
if (getValue().getType() == IntegerType::get(getContext(), 0U))
return StringAttr::get(getContext(), "");
if (auto intAttr = llvm::dyn_cast_or_null<IntegerAttr>(adaptor.getValue())) {
SmallVector<char, 8> strBuf;
intAttr.getValue().toString(strBuf, 16U, /*Signed*/ false,
/*formatAsCLiteral*/ false,
/*UpperCase*/ false);
unsigned width = intAttr.getType().getIntOrFloatBitWidth();
unsigned padWidth = width / 4;
if (width % 4 != 0)
padWidth++;
padWidth = padWidth > strBuf.size() ? padWidth - strBuf.size() : 0;
SmallVector<char, 8> padding(padWidth, '0');
return StringAttr::get(getContext(), Twine(padding) + Twine(strBuf));
}
return {};
}
OpFoldResult FormatBinOp::fold(FoldAdaptor adaptor) {
if (getValue().getType() == IntegerType::get(getContext(), 0U))
return StringAttr::get(getContext(), "");
if (auto intAttr = llvm::dyn_cast_or_null<IntegerAttr>(adaptor.getValue())) {
SmallVector<char, 32> strBuf;
intAttr.getValue().toString(strBuf, 2U, false);
unsigned width = intAttr.getType().getIntOrFloatBitWidth();
unsigned padWidth = width > strBuf.size() ? width - strBuf.size() : 0;
SmallVector<char, 32> padding(padWidth, '0');
return StringAttr::get(getContext(), Twine(padding) + Twine(strBuf));
}
return {};
}
OpFoldResult FormatCharOp::fold(FoldAdaptor adaptor) {
auto width = getValue().getType().getIntOrFloatBitWidth();
if (width > 8)
return {};
if (width == 0)
return StringAttr::get(getContext(), Twine(static_cast<char>(0)));
if (auto intAttr = llvm::dyn_cast_or_null<IntegerAttr>(adaptor.getValue())) {
auto intValue = intAttr.getValue().getZExtValue();
return StringAttr::get(getContext(), Twine(static_cast<char>(intValue)));
}
return {};
}
static StringAttr concatLiterals(MLIRContext *ctxt, ArrayRef<StringRef> lits) {
assert(!lits.empty() && "No literals to concatenate");
if (lits.size() == 1)
return StringAttr::get(ctxt, lits.front());
SmallString<64> newLit;
for (auto lit : lits)
newLit += lit;
return StringAttr::get(ctxt, newLit);
}
OpFoldResult FormatStringConcatOp::fold(FoldAdaptor adaptor) {
if (getNumOperands() == 0)
return StringAttr::get(getContext(), "");
if (getNumOperands() == 1)
return getOperand(0);
// Fold if all operands are literals.
SmallVector<StringRef> lits;
for (auto attr : adaptor.getInputs()) {
auto lit = dyn_cast_or_null<StringAttr>(attr);
if (!lit)
return {};
lits.push_back(lit);
}
return concatLiterals(getContext(), lits);
}
LogicalResult FormatStringConcatOp::verify() {
if (llvm::any_of(getOperands(),
[&](Value operand) { return operand == getResult(); }))
return emitOpError("is infinitely recursive.");
return success();
}
LogicalResult FormatStringConcatOp::canonicalize(FormatStringConcatOp op,
PatternRewriter &rewriter) {
if (op.getNumOperands() < 2)
return failure(); // Should be handled by the folder
auto fmtStrType = FormatStringType::get(op.getContext());
// Check if there are adjacent literals we can merge or empty literals to
// remove
SmallVector<StringRef> litSequence;
SmallVector<Value> newOperands;
newOperands.reserve(op.getNumOperands());
FormatLitOp prevLitOp;
for (auto operand : op.getOperands()) {
if (auto litOp = operand.getDefiningOp<FormatLitOp>()) {
if (!litOp.getLiteral().empty()) {
prevLitOp = litOp;
litSequence.push_back(litOp.getLiteral());
}
} else {
if (!litSequence.empty()) {
if (litSequence.size() > 1) {
// Create a fused literal.
auto newLit = rewriter.createOrFold<FormatLitOp>(
op.getLoc(), fmtStrType,
concatLiterals(op.getContext(), litSequence));
newOperands.push_back(newLit);
} else {
// Reuse the existing literal.
newOperands.push_back(prevLitOp.getResult());
}
litSequence.clear();
}
newOperands.push_back(operand);
}
}
// Push trailing literals into the new operand list
if (!litSequence.empty()) {
if (litSequence.size() > 1) {
// Create a fused literal.
auto newLit = rewriter.createOrFold<FormatLitOp>(
op.getLoc(), fmtStrType,
concatLiterals(op.getContext(), litSequence));
newOperands.push_back(newLit);
} else {
// Reuse the existing literal.
newOperands.push_back(prevLitOp.getResult());
}
}
if (newOperands.size() == op.getNumOperands())
return failure(); // Nothing changed
if (newOperands.empty())
rewriter.replaceOpWithNewOp<FormatLitOp>(op, fmtStrType,
rewriter.getStringAttr(""));
else if (newOperands.size() == 1)
rewriter.replaceOp(op, newOperands);
else
rewriter.modifyOpInPlace(op, [&]() { op->setOperands(newOperands); });
return success();
}
//===----------------------------------------------------------------------===//
// TableGen generated logic.
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,27 @@
//===- SimTypes.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "circt/Dialect/Sim/SimTypes.h"
#include "circt/Dialect/Sim/SimDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/DialectImplementation.h"
#include "llvm/ADT/TypeSwitch.h"
using namespace circt;
using namespace sim;
using namespace mlir;
#define GET_TYPEDEF_CLASSES
#include "circt/Dialect/Sim/SimTypes.cpp.inc"
void SimDialect::registerTypes() {
addTypes<
#define GET_TYPEDEF_LIST
#include "circt/Dialect/Sim/SimTypes.cpp.inc"
>();
}

View File

@ -0,0 +1,119 @@
// RUN: circt-opt %s --canonicalize | FileCheck --strict-whitespace %s
// CHECK-LABEL: hw.module @constant_fold0
// CHECK: sim.fmt.lit ",0,0,;0,0, 0,0;1,1,-1,1;0011, 3, 3,3;01010,10, 10,0a;10000000,128,-128,80;0000001100101011111110, 51966, 51966,00cafe"
hw.module @constant_fold0(in %zeroWitdh: i0, out res: !sim.fstring) {
%comma = sim.fmt.lit ","
%semicolon = sim.fmt.lit ";"
%nocat = sim.fmt.concat ()
%w0b = sim.fmt.bin %zeroWitdh : i0
%w0u = sim.fmt.dec %zeroWitdh : i0
%w0s = sim.fmt.dec signed %zeroWitdh : i0
%w0h = sim.fmt.hex %zeroWitdh : i0
%catw0 = sim.fmt.concat (%w0b, %comma, %w0u, %comma, %w0s, %comma, %w0h)
%cst0_1 = hw.constant 0 : i1
%w1b0 = sim.fmt.bin %cst0_1 : i1
%w1u0 = sim.fmt.dec %cst0_1 : i1
%w1s0 = sim.fmt.dec signed %cst0_1 : i1
%w1h0 = sim.fmt.hex %cst0_1 : i1
%catw1_0 = sim.fmt.concat (%w1b0, %comma, %w1u0, %comma, %w1s0, %comma, %w1h0)
%cst1_1 = hw.constant -1 : i1
%w1b1 = sim.fmt.bin %cst1_1 : i1
%w1u1 = sim.fmt.dec %cst1_1 : i1
%w1s1 = sim.fmt.dec signed %cst1_1 : i1
%w1h1 = sim.fmt.hex %cst1_1 : i1
%catw1_1 = sim.fmt.concat (%w1b1, %comma, %w1u1, %comma, %w1s1, %comma, %w1h1)
%cst3_4 = hw.constant 3 : i4
%w4b3 = sim.fmt.bin %cst3_4 : i4
%w4u3 = sim.fmt.dec %cst3_4 : i4
%w4s3 = sim.fmt.dec signed %cst3_4 : i4
%w4h3 = sim.fmt.hex %cst3_4 : i4
%catw4_3 = sim.fmt.concat (%w4b3, %comma, %w4u3, %comma, %w4s3, %comma, %w4h3)
%cst10_5 = hw.constant 10 : i5
%w5b10 = sim.fmt.bin %cst10_5 : i5
%w5u10 = sim.fmt.dec %cst10_5 : i5
%w5s10 = sim.fmt.dec signed %cst10_5 : i5
%w5h10 = sim.fmt.hex %cst10_5 : i5
%catw5_10 = sim.fmt.concat (%w5b10, %comma, %w5u10, %comma, %w5s10, %comma, %w5h10)
%cst128_8 = hw.constant 128 : i8
%w8b128 = sim.fmt.bin %cst128_8 : i8
%w8u128 = sim.fmt.dec %cst128_8 : i8
%w8s128 = sim.fmt.dec signed %cst128_8 : i8
%w8h128 = sim.fmt.hex %cst128_8 : i8
%catw8_128 = sim.fmt.concat (%w8b128, %comma, %w8u128, %comma, %w8s128, %comma, %w8h128, %nocat)
%cstcafe_22 = hw.constant 0xcafe : i22
%w22bcafe = sim.fmt.bin %cstcafe_22 : i22
%w22ucafe = sim.fmt.dec %cstcafe_22 : i22
%w22scafe = sim.fmt.dec signed %cstcafe_22 : i22
%w22hcafe = sim.fmt.hex %cstcafe_22 : i22
%catw22_cafe = sim.fmt.concat (%w22bcafe, %comma, %w22ucafe, %comma, %w22scafe, %comma, %w22hcafe)
%catout = sim.fmt.concat (%catw0, %semicolon, %catw1_0, %semicolon, %catw1_1, %nocat, %semicolon, %catw4_3, %semicolon, %catw5_10, %semicolon, %catw8_128, %semicolon, %catw22_cafe)
%catcatout = sim.fmt.concat (%catout)
hw.output %catcatout : !sim.fstring
}
// CHECK-LABEL: hw.module @constant_fold1
// CHECK: sim.fmt.lit " %b: '111111111111111111111111111111111111111111111111111000110100000010010001001010111001101011110010101010110010011011001001110' %u: '10633823966279322740806214058000332366' %d: ' -4242424242424242424242' %x: '7ffffffffffff1a04895cd79559364e'"
hw.module @constant_fold1(out res: !sim.fstring) {
%preb = sim.fmt.lit " %b: '"
%preu = sim.fmt.lit " %u: '"
%pres = sim.fmt.lit " %d: '"
%preh = sim.fmt.lit " %x: '"
%q = sim.fmt.lit "'"
%cst42_123 = hw.constant -4242424242424242424242 : i123
%w123b42 = sim.fmt.bin %cst42_123 : i123
%w123u42 = sim.fmt.dec %cst42_123 : i123
%w123s42 = sim.fmt.dec signed %cst42_123 : i123
%w123h42 = sim.fmt.hex %cst42_123 : i123
%res = sim.fmt.concat (%preb, %w123b42, %q, %preu, %w123u42, %q, %pres, %w123s42, %q, %preh, %w123h42, %q)
hw.output %res : !sim.fstring
}
// CHECK-LABEL: hw.module @constant_fold2
hw.module @constant_fold2(in %foo: i1027, out res: !sim.fstring) {
// CHECK: [[SDS:%.+]] = sim.fmt.lit " - "
// CHECK: [[HEX:%.+]] = sim.fmt.hex %foo : i1027
// CHECK: [[CAT:%.+]] = sim.fmt.concat ([[SDS]], [[HEX]], [[SDS]])
// CHECK: hw.output [[CAT]] : !sim.fstring
%space = sim.fmt.lit " "
%dash = sim.fmt.lit "-"
%spaceDashSpace = sim.fmt.lit " - "
%hex = sim.fmt.hex %foo : i1027
%res = sim.fmt.concat (%spaceDashSpace, %hex, %space, %dash, %space)
hw.output %res : !sim.fstring
}
// CHECK-LABEL: hw.module @constant_fold3
// CHECK: sim.fmt.lit "Foo\0A\0D\00Foo\00\C8"
hw.module @constant_fold3(in %zeroWitdh: i0, out res: !sim.fstring) {
%F = hw.constant 70 : i7
%o = hw.constant 111 : i8
%cr = hw.constant 13 : i4
%lf = hw.constant 10 : i5
%ext = hw.constant 200: i8
%cF = sim.fmt.char %F : i7
%co = sim.fmt.char %o : i8
%ccr = sim.fmt.char %cr : i4
%clf = sim.fmt.char %lf : i5
%cext = sim.fmt.char %ext : i8
%null = sim.fmt.char %zeroWitdh : i0
%foo = sim.fmt.concat (%cF, %co, %co)
%cat = sim.fmt.concat (%foo, %clf, %ccr, %null, %foo, %null, %cext)
hw.output %cat : !sim.fstring
}

View File

@ -0,0 +1,8 @@
// RUN: circt-opt %s --split-input-file --verify-diagnostics
hw.module @fmt_infinite_concat() {
%lp = sim.fmt.lit ", {"
%rp = sim.fmt.lit "}"
// expected-error @below {{op is infinitely recursive.}}
%ordinal = sim.fmt.concat (%ordinal, %lp, %ordinal, %rp)
}