diff --git a/lib/Transforms/Simplification/ReduceInitialInterval.cpp b/lib/Transforms/Simplification/ReduceInitialInterval.cpp index c9658a3..2319a02 100644 --- a/lib/Transforms/Simplification/ReduceInitialInterval.cpp +++ b/lib/Transforms/Simplification/ReduceInitialInterval.cpp @@ -50,7 +50,6 @@ struct ReduceInitialIntervalPattern : public OpRewritePattern { // Traverse all buffer accesses in the loop body. for (auto pair : map) { - auto buf = pair.first; auto accesses = pair.second; // Only if a load depends on a dominated store (a back dependence), the diff --git a/lib/Transforms/Utils.cpp b/lib/Transforms/Utils.cpp index 2ced7a8..11b8eac 100644 --- a/lib/Transforms/Utils.cpp +++ b/lib/Transforms/Utils.cpp @@ -116,6 +116,7 @@ static void addPassPipeline(PassManager &pm) { // Generic common sub expression elimination. pm.addPass(createCSEPass()); + pm.addPass(createReduceInitialIntervalPass()); } bool scalehls::applyMemoryAccessOpt(FuncOp func) { diff --git a/samples/rosetta/spam-filter/Sgd_sw_base.cpp b/samples/rosetta/spam-filter/SgdLR_sw_base.cpp similarity index 100% rename from samples/rosetta/spam-filter/Sgd_sw_base.cpp rename to samples/rosetta/spam-filter/SgdLR_sw_base.cpp diff --git a/samples/rosetta/spam-filter/Sgd_sw_dse.cpp b/samples/rosetta/spam-filter/SgdLR_sw_dse.cpp similarity index 65% rename from samples/rosetta/spam-filter/Sgd_sw_dse.cpp rename to samples/rosetta/spam-filter/SgdLR_sw_dse.cpp index 40e70c5..6f33c42 100644 --- a/samples/rosetta/spam-filter/Sgd_sw_dse.cpp +++ b/samples/rosetta/spam-filter/SgdLR_sw_dse.cpp @@ -17,13 +17,13 @@ using namespace std; /// This is top function. -/// Latency=190822514, interval=190822514 -/// DSP=68 +/// Latency=90810014, interval=90810014 +/// DSP=69 void SgdLR_sw( float v0[4608000], int32_t v1[4500], float v2[1024] -) { // L1, [0,190822514) +) { // L1, [0,90810014) #pragma HLS interface s_axilite port=return bundle=ctrl #pragma HLS interface bram port=v0 #pragma HLS interface bram port=v1 @@ -41,56 +41,56 @@ void SgdLR_sw( #pragma HLS array_partition variable=v3 cyclic factor=8 dim=1 #pragma HLS resource variable=v3 core=ram_s2p_bram - for (int v4 = 0; v4 < 5; v4 += 1) { // L6, [0,190822512), iterCycle=38164502, II=38164502 - for (int v5 = 0; v5 < 4500; v5 += 1) { // L7, [0,38164502), iterCycle=8481, II=8481 + for (int v4 = 0; v4 < 5; v4 += 1) { // L6, [0,90810012), iterCycle=18162002, II=18162002 + for (int v5 = 0; v5 < 4500; v5 += 1) { // L7, [0,18162002), iterCycle=4036, II=4036 float v6[1]; // L8, [0,0) - v6[0] = 0.000000; // L9, [8134,8135) + v6[0] = 0.000000; // L9, [3724,3725) float v7[1]; // L10, [0,0) - v7[0] = 0.000000; // L11, [8175,8176) - for (int v8 = 0; v8 < 128; v8 += 1) { // L12, [0,8199), iterCycle=69, II=64 - #pragma HLS pipeline II=42 - float v9 = v6[0]; // L13, [5,6) - float v10 = v2[(v8 * 8)]; // L14, [0,2) - float v11 = v0[((v5 * 1024) + (v8 * 8))]; // L15, [0,2) - float v12 = v10 * v11; // L16, [2,6) - float v13 = v9 + v12; // L17, [6,11) - float v14 = v2[((v8 * 8) + 1)]; // L18, [5,7) - float v15 = v0[(((v5 * 1024) + (v8 * 8)) + 1)]; // L19, [5,7) - float v16 = v14 * v15; // L20, [7,11) - float v17 = v13 + v16; // L21, [11,16) - float v18 = v2[((v8 * 8) + 2)]; // L22, [10,12) - float v19 = v0[(((v5 * 1024) + (v8 * 8)) + 2)]; // L23, [10,12) - float v20 = v18 * v19; // L24, [12,16) - float v21 = v17 + v20; // L25, [16,21) - float v22 = v2[((v8 * 8) + 3)]; // L26, [15,17) - float v23 = v0[(((v5 * 1024) + (v8 * 8)) + 3)]; // L27, [15,17) - float v24 = v22 * v23; // L28, [17,21) - float v25 = v21 + v24; // L29, [21,26) - float v26 = v2[((v8 * 8) + 4)]; // L30, [20,22) - float v27 = v0[(((v5 * 1024) + (v8 * 8)) + 4)]; // L31, [20,22) - float v28 = v26 * v27; // L32, [22,26) - float v29 = v25 + v28; // L33, [26,31) - float v30 = v2[((v8 * 8) + 5)]; // L34, [25,27) - float v31 = v0[(((v5 * 1024) + (v8 * 8)) + 5)]; // L35, [25,27) - float v32 = v30 * v31; // L36, [27,31) - float v33 = v29 + v32; // L37, [31,36) - float v34 = v2[((v8 * 8) + 6)]; // L38, [30,32) - float v35 = v0[(((v5 * 1024) + (v8 * 8)) + 6)]; // L39, [30,32) - float v36 = v34 * v35; // L40, [32,36) - float v37 = v33 + v36; // L41, [36,41) - float v38 = v2[((v8 * 8) + 7)]; // L42, [35,37) - float v39 = v0[(((v5 * 1024) + (v8 * 8)) + 7)]; // L43, [35,37) - float v40 = v38 * v39; // L44, [37,41) - float v41 = v37 + v40; // L45, [41,46) + v7[0] = 0.000000; // L11, [3730,3731) + for (int v8 = 0; v8 < 128; v8 += 1) { // L12, [0,3754), iterCycle=69, II=29 + #pragma HLS pipeline II=7 + float v9 = v2[(v8 * 8)]; // L13, [0,2) + float v10 = v0[((v5 * 1024) + (v8 * 8))]; // L14, [0,2) + float v11 = v9 * v10; // L15, [2,6) + float v12 = v2[((v8 * 8) + 1)]; // L16, [0,2) + float v13 = v0[(((v5 * 1024) + (v8 * 8)) + 1)]; // L17, [0,2) + float v14 = v12 * v13; // L18, [2,6) + float v15 = v11 + v14; // L19, [6,11) + float v16 = v2[((v8 * 8) + 2)]; // L20, [5,7) + float v17 = v0[(((v5 * 1024) + (v8 * 8)) + 2)]; // L21, [5,7) + float v18 = v16 * v17; // L22, [7,11) + float v19 = v15 + v18; // L23, [11,16) + float v20 = v2[((v8 * 8) + 3)]; // L24, [10,12) + float v21 = v0[(((v5 * 1024) + (v8 * 8)) + 3)]; // L25, [10,12) + float v22 = v20 * v21; // L26, [12,16) + float v23 = v19 + v22; // L27, [16,21) + float v24 = v2[((v8 * 8) + 4)]; // L28, [15,17) + float v25 = v0[(((v5 * 1024) + (v8 * 8)) + 4)]; // L29, [15,17) + float v26 = v24 * v25; // L30, [17,21) + float v27 = v23 + v26; // L31, [21,26) + float v28 = v2[((v8 * 8) + 5)]; // L32, [20,22) + float v29 = v0[(((v5 * 1024) + (v8 * 8)) + 5)]; // L33, [20,22) + float v30 = v28 * v29; // L34, [22,26) + float v31 = v27 + v30; // L35, [26,31) + float v32 = v2[((v8 * 8) + 6)]; // L36, [25,27) + float v33 = v0[(((v5 * 1024) + (v8 * 8)) + 6)]; // L37, [25,27) + float v34 = v32 * v33; // L38, [27,31) + float v35 = v31 + v34; // L39, [31,36) + float v36 = v2[((v8 * 8) + 7)]; // L40, [30,32) + float v37 = v0[(((v5 * 1024) + (v8 * 8)) + 7)]; // L41, [30,32) + float v38 = v36 * v37; // L42, [32,36) + float v39 = v35 + v38; // L43, [36,41) + float v40 = v6[0]; // L44, [40,41) + float v41 = v40 + v39; // L45, [41,46) v6[0] = v41; // L46, [68,69) v7[0] = v41; // L47, [46,47) } - float v42 = v7[0]; // L49, [8177,8178) - float v43 = -(v42); // L50, [8178,8178) - float v44 = exp(v43); // L51, [8178,8178) - float v45 = 1.000000 + v44; // L52, [8178,8183) - float v46 = 1.000000 / v45; // L53, [8183,8199) - for (int v47 = 0; v47 < 128; v47 += 1) { // L54, [8199,8340), iterCycle=12, II=1 + float v42 = v7[0]; // L49, [3732,3733) + float v43 = -(v42); // L50, [3733,3733) + float v44 = exp(v43); // L51, [3733,3733) + float v45 = 1.000000 + v44; // L52, [3733,3738) + float v46 = 1.000000 / v45; // L53, [3738,3754) + for (int v47 = 0; v47 < 128; v47 += 1) { // L54, [3754,3895), iterCycle=12, II=1 #pragma HLS pipeline II=1 int32_t v48 = v1[v5]; // L55, [0,2) float v49 = v48; // L56, [2,2) @@ -120,7 +120,7 @@ void SgdLR_sw( float v66 = v50 * v65; // L80, [7,11) v3[((v47 * 8) + 7)] = v66; // L81, [11,12) } - for (int v67 = 0; v67 < 128; v67 += 1) { // L83, [8340,8481), iterCycle=12, II=1 + for (int v67 = 0; v67 < 128; v67 += 1) { // L83, [3895,4036), iterCycle=12, II=1 #pragma HLS pipeline II=1 float v68 = v3[(v67 * 8)]; // L84, [0,2) float v69 = -60000.000000 * v68; // L85, [2,6)