[Utils] Include reduce-initial-interval into the dse

This commit is contained in:
Hanchen Ye 2022-02-09 14:34:57 -06:00
parent dfc2ab12c2
commit 941d17bdf9
4 changed files with 50 additions and 50 deletions

View File

@ -50,7 +50,6 @@ struct ReduceInitialIntervalPattern : public OpRewritePattern<AffineForOp> {
// Traverse all buffer accesses in the loop body.
for (auto pair : map) {
auto buf = pair.first;
auto accesses = pair.second;
// Only if a load depends on a dominated store (a back dependence), the

View File

@ -116,6 +116,7 @@ static void addPassPipeline(PassManager &pm) {
// Generic common sub expression elimination.
pm.addPass(createCSEPass());
pm.addPass(createReduceInitialIntervalPass());
}
bool scalehls::applyMemoryAccessOpt(FuncOp func) {

View File

@ -17,13 +17,13 @@
using namespace std;
/// This is top function.
/// Latency=190822514, interval=190822514
/// DSP=68
/// Latency=90810014, interval=90810014
/// DSP=69
void SgdLR_sw(
float v0[4608000],
int32_t v1[4500],
float v2[1024]
) { // L1, [0,190822514)
) { // L1, [0,90810014)
#pragma HLS interface s_axilite port=return bundle=ctrl
#pragma HLS interface bram port=v0
#pragma HLS interface bram port=v1
@ -41,56 +41,56 @@ void SgdLR_sw(
#pragma HLS array_partition variable=v3 cyclic factor=8 dim=1
#pragma HLS resource variable=v3 core=ram_s2p_bram
for (int v4 = 0; v4 < 5; v4 += 1) { // L6, [0,190822512), iterCycle=38164502, II=38164502
for (int v5 = 0; v5 < 4500; v5 += 1) { // L7, [0,38164502), iterCycle=8481, II=8481
for (int v4 = 0; v4 < 5; v4 += 1) { // L6, [0,90810012), iterCycle=18162002, II=18162002
for (int v5 = 0; v5 < 4500; v5 += 1) { // L7, [0,18162002), iterCycle=4036, II=4036
float v6[1]; // L8, [0,0)
v6[0] = 0.000000; // L9, [8134,8135)
v6[0] = 0.000000; // L9, [3724,3725)
float v7[1]; // L10, [0,0)
v7[0] = 0.000000; // L11, [8175,8176)
for (int v8 = 0; v8 < 128; v8 += 1) { // L12, [0,8199), iterCycle=69, II=64
#pragma HLS pipeline II=42
float v9 = v6[0]; // L13, [5,6)
float v10 = v2[(v8 * 8)]; // L14, [0,2)
float v11 = v0[((v5 * 1024) + (v8 * 8))]; // L15, [0,2)
float v12 = v10 * v11; // L16, [2,6)
float v13 = v9 + v12; // L17, [6,11)
float v14 = v2[((v8 * 8) + 1)]; // L18, [5,7)
float v15 = v0[(((v5 * 1024) + (v8 * 8)) + 1)]; // L19, [5,7)
float v16 = v14 * v15; // L20, [7,11)
float v17 = v13 + v16; // L21, [11,16)
float v18 = v2[((v8 * 8) + 2)]; // L22, [10,12)
float v19 = v0[(((v5 * 1024) + (v8 * 8)) + 2)]; // L23, [10,12)
float v20 = v18 * v19; // L24, [12,16)
float v21 = v17 + v20; // L25, [16,21)
float v22 = v2[((v8 * 8) + 3)]; // L26, [15,17)
float v23 = v0[(((v5 * 1024) + (v8 * 8)) + 3)]; // L27, [15,17)
float v24 = v22 * v23; // L28, [17,21)
float v25 = v21 + v24; // L29, [21,26)
float v26 = v2[((v8 * 8) + 4)]; // L30, [20,22)
float v27 = v0[(((v5 * 1024) + (v8 * 8)) + 4)]; // L31, [20,22)
float v28 = v26 * v27; // L32, [22,26)
float v29 = v25 + v28; // L33, [26,31)
float v30 = v2[((v8 * 8) + 5)]; // L34, [25,27)
float v31 = v0[(((v5 * 1024) + (v8 * 8)) + 5)]; // L35, [25,27)
float v32 = v30 * v31; // L36, [27,31)
float v33 = v29 + v32; // L37, [31,36)
float v34 = v2[((v8 * 8) + 6)]; // L38, [30,32)
float v35 = v0[(((v5 * 1024) + (v8 * 8)) + 6)]; // L39, [30,32)
float v36 = v34 * v35; // L40, [32,36)
float v37 = v33 + v36; // L41, [36,41)
float v38 = v2[((v8 * 8) + 7)]; // L42, [35,37)
float v39 = v0[(((v5 * 1024) + (v8 * 8)) + 7)]; // L43, [35,37)
float v40 = v38 * v39; // L44, [37,41)
float v41 = v37 + v40; // L45, [41,46)
v7[0] = 0.000000; // L11, [3730,3731)
for (int v8 = 0; v8 < 128; v8 += 1) { // L12, [0,3754), iterCycle=69, II=29
#pragma HLS pipeline II=7
float v9 = v2[(v8 * 8)]; // L13, [0,2)
float v10 = v0[((v5 * 1024) + (v8 * 8))]; // L14, [0,2)
float v11 = v9 * v10; // L15, [2,6)
float v12 = v2[((v8 * 8) + 1)]; // L16, [0,2)
float v13 = v0[(((v5 * 1024) + (v8 * 8)) + 1)]; // L17, [0,2)
float v14 = v12 * v13; // L18, [2,6)
float v15 = v11 + v14; // L19, [6,11)
float v16 = v2[((v8 * 8) + 2)]; // L20, [5,7)
float v17 = v0[(((v5 * 1024) + (v8 * 8)) + 2)]; // L21, [5,7)
float v18 = v16 * v17; // L22, [7,11)
float v19 = v15 + v18; // L23, [11,16)
float v20 = v2[((v8 * 8) + 3)]; // L24, [10,12)
float v21 = v0[(((v5 * 1024) + (v8 * 8)) + 3)]; // L25, [10,12)
float v22 = v20 * v21; // L26, [12,16)
float v23 = v19 + v22; // L27, [16,21)
float v24 = v2[((v8 * 8) + 4)]; // L28, [15,17)
float v25 = v0[(((v5 * 1024) + (v8 * 8)) + 4)]; // L29, [15,17)
float v26 = v24 * v25; // L30, [17,21)
float v27 = v23 + v26; // L31, [21,26)
float v28 = v2[((v8 * 8) + 5)]; // L32, [20,22)
float v29 = v0[(((v5 * 1024) + (v8 * 8)) + 5)]; // L33, [20,22)
float v30 = v28 * v29; // L34, [22,26)
float v31 = v27 + v30; // L35, [26,31)
float v32 = v2[((v8 * 8) + 6)]; // L36, [25,27)
float v33 = v0[(((v5 * 1024) + (v8 * 8)) + 6)]; // L37, [25,27)
float v34 = v32 * v33; // L38, [27,31)
float v35 = v31 + v34; // L39, [31,36)
float v36 = v2[((v8 * 8) + 7)]; // L40, [30,32)
float v37 = v0[(((v5 * 1024) + (v8 * 8)) + 7)]; // L41, [30,32)
float v38 = v36 * v37; // L42, [32,36)
float v39 = v35 + v38; // L43, [36,41)
float v40 = v6[0]; // L44, [40,41)
float v41 = v40 + v39; // L45, [41,46)
v6[0] = v41; // L46, [68,69)
v7[0] = v41; // L47, [46,47)
}
float v42 = v7[0]; // L49, [8177,8178)
float v43 = -(v42); // L50, [8178,8178)
float v44 = exp(v43); // L51, [8178,8178)
float v45 = 1.000000 + v44; // L52, [8178,8183)
float v46 = 1.000000 / v45; // L53, [8183,8199)
for (int v47 = 0; v47 < 128; v47 += 1) { // L54, [8199,8340), iterCycle=12, II=1
float v42 = v7[0]; // L49, [3732,3733)
float v43 = -(v42); // L50, [3733,3733)
float v44 = exp(v43); // L51, [3733,3733)
float v45 = 1.000000 + v44; // L52, [3733,3738)
float v46 = 1.000000 / v45; // L53, [3738,3754)
for (int v47 = 0; v47 < 128; v47 += 1) { // L54, [3754,3895), iterCycle=12, II=1
#pragma HLS pipeline II=1
int32_t v48 = v1[v5]; // L55, [0,2)
float v49 = v48; // L56, [2,2)
@ -120,7 +120,7 @@ void SgdLR_sw(
float v66 = v50 * v65; // L80, [7,11)
v3[((v47 * 8) + 7)] = v66; // L81, [11,12)
}
for (int v67 = 0; v67 < 128; v67 += 1) { // L83, [8340,8481), iterCycle=12, II=1
for (int v67 = 0; v67 < 128; v67 += 1) { // L83, [3895,4036), iterCycle=12, II=1
#pragma HLS pipeline II=1
float v68 = v3[(v67 * 8)]; // L84, [0,2)
float v69 = -60000.000000 * v68; // L85, [2,6)