From 72769f9a1c298f58328c55c6c43e632cdfd223d6 Mon Sep 17 00:00:00 2001 From: Jenny Huang Date: Sun, 3 Nov 2019 01:44:31 -0800 Subject: [PATCH] Update examples/scripts to force user define accel in accel.c --- examples/vadd/accel.c | 36 +++++++ examples/vadd/accel.h | 13 +++ examples/vadd/vadd.c | 42 +------- examples/vadd_tl/accel.c | 116 ++++++++++++++++++++ examples/vadd_tl/accel.h | 13 +++ examples/vadd_tl/vadd_tl.c | 131 +---------------------- scripts/generate_wrapper.pl | 36 +++++-- scripts/generate_wrapper_tl.pl | 30 ++++-- scripts/run_hls.pl | 36 ++++--- scripts/sw_aux/makefiles/Makefile.bm.in | 48 +++++++-- scripts/sw_aux/makefiles/Makefile.gcc.in | 12 ++- 11 files changed, 303 insertions(+), 210 deletions(-) create mode 100644 examples/vadd/accel.c create mode 100644 examples/vadd/accel.h create mode 100644 examples/vadd_tl/accel.c create mode 100644 examples/vadd_tl/accel.h diff --git a/examples/vadd/accel.c b/examples/vadd/accel.c new file mode 100644 index 0000000..a13c430 --- /dev/null +++ b/examples/vadd/accel.c @@ -0,0 +1,36 @@ +#include "accel.h" + +int vadd(int* length_a, int* b_c) { +#pragma HLS INTERFACE ap_bus depth=10 port=length_a +#pragma HLS INTERFACE ap_bus depth=10 port=b_c + +int length = length_a[0]; +int * a = & length_a[1]; +//int * b = b_c; +//int * c = & b_c[length]; +// For pointer type + +//#pragma HLS DATAFLOW + int upper = (length >> 3) << 3; + int i = 0; + for (i = 0; i < upper; i += 8) { + // To prevent burst mode + b_c[length+i+1] = a[i+1] +b_c[i+1]; + b_c[length+i+0] = a[i+0] +b_c[i+0]; + b_c[length+i+2] = a[i+2] +b_c[i+2]; + b_c[length+i+3] = a[i+3] +b_c[i+3]; + + b_c[length+i+4] = a[i+4] +b_c[i+4]; + b_c[length+i+5] = a[i+5] +b_c[i+5]; + b_c[length+i+6] = a[i+6] +b_c[i+6]; + b_c[length+i+7] = a[i+7] +b_c[i+7]; + } + + int output = 0; + for (i = upper; i < length; i++) { + b_c[length+i] = a[i] +b_c[i]; + } + return 0; +} + + diff --git a/examples/vadd/accel.h b/examples/vadd/accel.h new file mode 100644 index 0000000..1874c91 --- /dev/null +++ b/examples/vadd/accel.h @@ -0,0 +1,13 @@ +#ifndef ACCEL_H +#define ACCEL_H + +// MUST ADD +// In generated accel wrapper the ACCEL_WRAPPER is defined +// The accel_wrapper.h is also generated +#ifdef ACCEL_WRAPPER +#include "accel_wrapper.h" +#else +int vadd(int* length_a, int* b_c); +#endif + +#endif diff --git a/examples/vadd/vadd.c b/examples/vadd/vadd.c index b176560..2f2c252 100644 --- a/examples/vadd/vadd.c +++ b/examples/vadd/vadd.c @@ -1,11 +1,10 @@ #include #include #include "time.h" -#ifdef CUSTOM_INST -#include "bm_wrapper.h" -#endif #define LENGTH 80 +#include "accel.h" + void print_vec(int* vec, int length){ for(int i = 0; i < length; i++){ if (i != 0 ) printf(", "); @@ -13,39 +12,6 @@ void print_vec(int* vec, int length){ } } -int vadd(int* length_a, int* b_c) { -#pragma HLS INTERFACE ap_bus depth=10 port=length_a -#pragma HLS INTERFACE ap_bus depth=10 port=b_c - -int length = length_a[0]; -int * a = & length_a[1]; -//int * b = b_c; -//int * c = & b_c[length]; -// For pointer type - -//#pragma HLS DATAFLOW - int upper = (length >> 3) << 3; - int i = 0; - for (i = 0; i < upper; i += 8) { - // To prevent burst mode - b_c[length+i+1] = a[i+1] +b_c[i+1]; - b_c[length+i+0] = a[i+0] +b_c[i+0]; - b_c[length+i+2] = a[i+2] +b_c[i+2]; - b_c[length+i+3] = a[i+3] +b_c[i+3]; - - b_c[length+i+4] = a[i+4] +b_c[i+4]; - b_c[length+i+5] = a[i+5] +b_c[i+5]; - b_c[length+i+6] = a[i+6] +b_c[i+6]; - b_c[length+i+7] = a[i+7] +b_c[i+7]; - } - - int output = 0; - for (i = upper; i < length; i++) { - b_c[length+i] = a[i] +b_c[i]; - } - return 0; -} - int main () { int length_a[LENGTH + 1], b_c[LENGTH + LENGTH]; //int a[LENGTH], b[LENGTH], c[LENGTH]; @@ -59,11 +25,7 @@ int main () { uint64_t begin, end, dur; begin = read_cycle(); -#ifdef CUSTOM_INST - vadd_wrapper(length_a, b_c); -#else vadd(length_a, b_c); -#endif end = read_cycle(); duration(begin, end); diff --git a/examples/vadd_tl/accel.c b/examples/vadd_tl/accel.c new file mode 100644 index 0000000..ce93f41 --- /dev/null +++ b/examples/vadd_tl/accel.c @@ -0,0 +1,116 @@ +#include "accel.h" + +int vadd(int * a, int * b, int* c, int length) { + +// For pointer type +#pragma HLS INTERFACE m_axi port=a offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125 + +#pragma HLS INTERFACE m_axi port=b offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125 + // Slave is for AXI4Lite, with burst mode disabled +#pragma HLS INTERFACE m_axi port=c offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125 + + +#pragma HLS INTERFACE s_axilite port=a bundle=control +#pragma HLS INTERFACE s_axilite port=b bundle=control +#pragma HLS INTERFACE s_axilite port=c bundle=control +#pragma HLS INTERFACE s_axilite port=length bundle=control +#pragma HLS INTERFACE s_axilite port=return bundle=control + +//#pragma HLS DATAFLOW + int upper = (length >> 3) << 3; + int i = 0; + for (i = 0; i < upper; i += 8) { + // To prevent burst mode + c[i+0] = a[i+0] +b[i+0]; + c[i+1] = a[i+1] +b[i+1]; + c[i+2] = a[i+2] +b[i+2]; + c[i+3] = a[i+3] +b[i+3]; + + c[i+4] = a[i+4] +b[i+4]; + c[i+5] = a[i+5] +b[i+5]; + c[i+6] = a[i+6] +b[i+6]; + c[i+7] = a[i+7] +b[i+7]; + } + + for (i = upper; i < length; i++) { + c[i] = a[i] +b[i]; + } + return 0; +} + +//Including to use ap_uint<> datatype +//#include +/* +#define BUFFER_SIZE 128 +#define DATAWIDTH 512 +#define VECTOR_SIZE (DATAWIDTH / 32) // vector size is 16 (512/32 = 16) +//typedef ap_uint uint512_dt; + + Vector Addition Kernel Implementation using uint512_dt datatype + Arguments: + in1 (input) --> Input Vector1 + in2 (input) --> Input Vector2 + out (output) --> Output Vector + size (input) --> Size of Vector in Integer + */ +/*extern "C" { +void vadd( + const uint512_dt *in1, // Read-Only Vector 1 + const uint512_dt *in2, // Read-Only Vector 2 + uint512_dt *out, // Output Result + int size // Size in integer + ) +{ +#pragma HLS INTERFACE m_axi port=in1 offset=slave bundle=gmem +#pragma HLS INTERFACE m_axi port=in2 offset=slave bundle=gmem +#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem +#pragma HLS INTERFACE s_axilite port=in1 bundle=control +#pragma HLS INTERFACE s_axilite port=in2 bundle=control +#pragma HLS INTERFACE s_axilite port=out bundle=control +#pragma HLS INTERFACE s_axilite port=size bundle=control +#pragma HLS INTERFACE s_axilite port=return bundle=control + + uint512_dt v1_local[BUFFER_SIZE]; // Local memory to store vector1 + uint512_dt result_local[BUFFER_SIZE];// Local Memory to store result + + // Input vector size for interger vectors. However kernel is directly + // accessing 512bit data (total 16 elements). So total number of read + // from global memory is calculated here: + int size_in16 = (size-1) / VECTOR_SIZE + 1; + + //Per iteration of this loop perform BUFFER_SIZE vector addition + for(int i = 0; i < size_in16; i += BUFFER_SIZE) + { + #pragma HLS LOOP_TRIPCOUNT min=8 max=8 + int chunk_size = BUFFER_SIZE; + + //boundary checks + if ((i + BUFFER_SIZE) > size_in16) + chunk_size = size_in16 - i; + + //burst read first vector from global memory to local memory + v1_rd: for (int j = 0 ; j < chunk_size; j++){ + #pragma HLS pipeline + #pragma HLS LOOP_TRIPCOUNT min=128 max=128 + v1_local[j] = in1 [i + j]; + } + + //burst read second vector and perform vector addition + v2_rd_add: for (int j = 0 ; j < chunk_size; j++){ + #pragma HLS pipeline + #pragma HLS LOOP_TRIPCOUNT min=128 max=128 + uint512_dt tmpV1 = v1_local[j]; + uint512_dt tmpV2 = in2[i+j]; + result_local[j] = tmpV1 + tmpV2; // Vector Addition Operation + } + + //burst write the result + out_write: for (int j = 0 ; j < chunk_size; j++){ + #pragma HLS pipeline + #pragma HLS LOOP_TRIPCOUNT min=128 max=128 + out[i+j] = result_local[j]; + } + } +} +}*/ + diff --git a/examples/vadd_tl/accel.h b/examples/vadd_tl/accel.h new file mode 100644 index 0000000..b2fbfe8 --- /dev/null +++ b/examples/vadd_tl/accel.h @@ -0,0 +1,13 @@ +#ifndef ACCEL_H +#define ACCEL_H + +// MUST ADD +// In generated accel wrapper the ACCEL_WRAPPER is defined +// The accel_wrapper.h is also generated +#ifdef ACCEL_WRAPPER +#include "accel_wrapper.h" +#else +int vadd(int * a, int * b, int* c, int length); +#endif + +#endif diff --git a/examples/vadd_tl/vadd_tl.c b/examples/vadd_tl/vadd_tl.c index f4fa703..af8b24c 100644 --- a/examples/vadd_tl/vadd_tl.c +++ b/examples/vadd_tl/vadd_tl.c @@ -1,10 +1,8 @@ #include #include #define LENGTH 80 -#ifdef CUSTOM_DRIVER -#include "bm_wrapper.h" -#endif #include "time.h" +#include "accel.h" void print_vec(int* vec, int length){ for(int i = 0; i < length; i++){ @@ -13,121 +11,6 @@ void print_vec(int* vec, int length){ } } -//Including to use ap_uint<> datatype -//#include -/* -#define BUFFER_SIZE 128 -#define DATAWIDTH 512 -#define VECTOR_SIZE (DATAWIDTH / 32) // vector size is 16 (512/32 = 16) -//typedef ap_uint uint512_dt; - - Vector Addition Kernel Implementation using uint512_dt datatype - Arguments: - in1 (input) --> Input Vector1 - in2 (input) --> Input Vector2 - out (output) --> Output Vector - size (input) --> Size of Vector in Integer - */ -/*extern "C" { -void vadd( - const uint512_dt *in1, // Read-Only Vector 1 - const uint512_dt *in2, // Read-Only Vector 2 - uint512_dt *out, // Output Result - int size // Size in integer - ) -{ -#pragma HLS INTERFACE m_axi port=in1 offset=slave bundle=gmem -#pragma HLS INTERFACE m_axi port=in2 offset=slave bundle=gmem -#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem -#pragma HLS INTERFACE s_axilite port=in1 bundle=control -#pragma HLS INTERFACE s_axilite port=in2 bundle=control -#pragma HLS INTERFACE s_axilite port=out bundle=control -#pragma HLS INTERFACE s_axilite port=size bundle=control -#pragma HLS INTERFACE s_axilite port=return bundle=control - - uint512_dt v1_local[BUFFER_SIZE]; // Local memory to store vector1 - uint512_dt result_local[BUFFER_SIZE];// Local Memory to store result - - // Input vector size for interger vectors. However kernel is directly - // accessing 512bit data (total 16 elements). So total number of read - // from global memory is calculated here: - int size_in16 = (size-1) / VECTOR_SIZE + 1; - - //Per iteration of this loop perform BUFFER_SIZE vector addition - for(int i = 0; i < size_in16; i += BUFFER_SIZE) - { - #pragma HLS LOOP_TRIPCOUNT min=8 max=8 - int chunk_size = BUFFER_SIZE; - - //boundary checks - if ((i + BUFFER_SIZE) > size_in16) - chunk_size = size_in16 - i; - - //burst read first vector from global memory to local memory - v1_rd: for (int j = 0 ; j < chunk_size; j++){ - #pragma HLS pipeline - #pragma HLS LOOP_TRIPCOUNT min=128 max=128 - v1_local[j] = in1 [i + j]; - } - - //burst read second vector and perform vector addition - v2_rd_add: for (int j = 0 ; j < chunk_size; j++){ - #pragma HLS pipeline - #pragma HLS LOOP_TRIPCOUNT min=128 max=128 - uint512_dt tmpV1 = v1_local[j]; - uint512_dt tmpV2 = in2[i+j]; - result_local[j] = tmpV1 + tmpV2; // Vector Addition Operation - } - - //burst write the result - out_write: for (int j = 0 ; j < chunk_size; j++){ - #pragma HLS pipeline - #pragma HLS LOOP_TRIPCOUNT min=128 max=128 - out[i+j] = result_local[j]; - } - } -} -}*/ - -int vadd(int * a, int * b, int* c, int length) { - -// For pointer type -#pragma HLS INTERFACE m_axi port=a offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125 - -#pragma HLS INTERFACE m_axi port=b offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125 - // Slave is for AXI4Lite, with burst mode disabled -#pragma HLS INTERFACE m_axi port=c offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125 - - -#pragma HLS INTERFACE s_axilite port=a bundle=control -#pragma HLS INTERFACE s_axilite port=b bundle=control -#pragma HLS INTERFACE s_axilite port=c bundle=control -#pragma HLS INTERFACE s_axilite port=length bundle=control -#pragma HLS INTERFACE s_axilite port=return bundle=control - -//#pragma HLS DATAFLOW - int upper = (length >> 3) << 3; - int i = 0; - for (i = 0; i < upper; i += 8) { - // To prevent burst mode - c[i+0] = a[i+0] +b[i+0]; - c[i+1] = a[i+1] +b[i+1]; - c[i+2] = a[i+2] +b[i+2]; - c[i+3] = a[i+3] +b[i+3]; - - c[i+4] = a[i+4] +b[i+4]; - c[i+5] = a[i+5] +b[i+5]; - c[i+6] = a[i+6] +b[i+6]; - c[i+7] = a[i+7] +b[i+7]; - } - - int output = 0; - for (i = upper; i < length; i++) { - c[i] = a[i] +b[i]; - } - return 0; -} - int main () { int a[LENGTH], b[LENGTH], c[LENGTH]; @@ -137,16 +20,12 @@ int main () { a[i] = i; b[i] = i + 5; } -uint64_t begin, end, dur; + uint64_t begin, end; -begin = read_cycle(); -#ifdef CUSTOM_DRIVER - vadd_wrapper(a, b, c, length); -#else + begin = read_cycle(); vadd(a, b, c, length); -#endif -end = read_cycle(); -duration(begin, end); + end = read_cycle(); + duration(begin, end); printf("A = ["); print_vec(a, length); printf("]\n"); diff --git a/scripts/generate_wrapper.pl b/scripts/generate_wrapper.pl index 0521d2e..d937b9a 100644 --- a/scripts/generate_wrapper.pl +++ b/scripts/generate_wrapper.pl @@ -24,8 +24,9 @@ if ((not defined($rdir)) or $rdir eq '') { print("Please source sourceme-f1.sh!\n"); exit(); } -my $wrapper_func_name = $func_name."_wrapper"; -my $wrapper_header= "bm_wrapper.h"; +my $wrapper_func_name = $func_name; +my $wrapper_file= "accel_wrapper.c"; +my $wrapper_header= "accel_wrapper.h"; if ($prefix) { $func_name = $prefix.$func_name; } @@ -256,6 +257,8 @@ foreach my $arg (@verilog_input_pointer_arg) { } my $wrapper = '#include "'.$bm_inc_path.'rocc.h"'."\n"; +$wrapper .="#define ACCEL_WRAPPER\n"; +$wrapper .='#include "accel.h"'."\n"; my $return_type = "void "; if($ap_return){ @@ -263,7 +266,7 @@ if($ap_return){ } my $total_args = @verilog_input_scalar + $hash_count; -$wrapper .= "$return_type $wrapper_func_name("; +my $func_prototype = "$return_type $wrapper_func_name("; my @args = (); foreach my $arg (@verilog_input_scalar) { @@ -277,20 +280,23 @@ my $arg_str = join ', ', @args; $i = 0; foreach my $arg (@args) { if ($i != 0){ - $wrapper .=", " + $func_prototype.=", " } - $wrapper .="uint64_t $arg"; + $func_prototype .="uint64_t $arg"; $i=1; } -$wrapper .= ") { -"; +$func_prototype .= ")"; +$wrapper .= $func_prototype; if($ap_return){ - $wrapper .= " uint64_t ret_val;\n"; + $wrapper .= " +{ + uint64_t ret_val;\n"; } + +#$wrapper .= " ROCC_BARRIER();\n"; $wrapper .=" - #ifdef CUSTOM_INST #define XCUSTOM_ACC "; $wrapper .= $rocc_index."\n"; @@ -312,12 +318,20 @@ if ($ap_return){ } } $wrapper .= " ROCC_BARRIER();\n"; -$wrapper.=" #endif\n"; if($ap_return){ $wrapper .= " return ret_val;\n"; } $wrapper.="}"; open FILE, "> $wrapper_header"; -print FILE $wrapper; +print FILE "#ifndef ACCEL_WRAPPER_H +#define ACCEL_WRAPPER_H\n +"; +print FILE "$func_prototype;\n"; +print FILE "#endif"; +close FILE; + +open FILE, "> $wrapper_file"; +print FILE $wrapper; +close FILE; diff --git a/scripts/generate_wrapper_tl.pl b/scripts/generate_wrapper_tl.pl index 122d873..40f4a5e 100644 --- a/scripts/generate_wrapper_tl.pl +++ b/scripts/generate_wrapper_tl.pl @@ -26,8 +26,10 @@ if ((not defined($rdir)) or $rdir eq '') { exit(); } -my $wrapper_func_name = $func_name."_wrapper"; -my $wrapper_header= "bm_wrapper.h"; +my $wrapper_func_name = $func_name; +my $wrapper_file= "accel_wrapper.c"; +my $wrapper_header= "accel_wrapper.h"; + if ($prefix) { $func_name = $prefix.$func_name; @@ -93,7 +95,8 @@ if(!open VERILOG, "$verilog_file"){ print("\n"); } my $wrapper = '#include "'.$bm_inc_path.'mmio.h"'."\n"; - + $wrapper .="#define ACCEL_WRAPPER\n"; + $wrapper .='#include "accel.h"'."\n"; $wrapper .= '#define ACCEL_BASE '.$func_base_addr."\n"; $wrapper .= "#define AP_DONE_MASK 0b10\n"; @@ -119,10 +122,12 @@ if(!open VERILOG, "$verilog_file"){ $ap_return = 1; } + + my $func_prototype = ''; if ($ap_return){ - $wrapper .= $ap_return_type." $wrapper_func_name("; + $func_prototype .= $ap_return_type." $wrapper_func_name("; } else { - $wrapper .="void $wrapper_func_name("; + $func_prototype .="void $wrapper_func_name("; } my @arglist=(); @@ -140,10 +145,11 @@ if(!open VERILOG, "$verilog_file"){ } my $args = join ', ', @arglist; - $wrapper.= $args.") {"; + $func_prototype .= $args.")"; + $wrapper .= $func_prototype; $wrapper.= ' - // Disable Interrupt +{ // Disable Interrupt reg_write32(ACCEL_BASE + ACCEL_INT, 0x0); '; @@ -199,8 +205,18 @@ if(!open VERILOG, "$verilog_file"){ } $wrapper .="}\n"; + open FILE, "> $wrapper_header"; + print FILE "#ifndef ACCEL_WRAPPER_H +#define ACCEL_WRAPPER_H\n + "; + print FILE "$func_prototype;\n"; + print FILE "#endif"; + close FILE; + + open FILE, "> $wrapper_file"; print FILE $wrapper; + close FILE; #} #generate_bm_wrapper(\%var_dict, $func_base_addr); diff --git a/scripts/run_hls.pl b/scripts/run_hls.pl index f62460b..9e94655 100644 --- a/scripts/run_hls.pl +++ b/scripts/run_hls.pl @@ -19,30 +19,41 @@ if ($num_args > 2) { # Generate directive file based on LLVM emitted output # If the variable is of pointer type that an ap_bus interface is generated -my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "test_c_func" test_var +my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "FUNC" test_var '; my $prefix_tcl = ""; if ($prefix) { $prefix_tcl = "config_rtl -prefix ".$prefix."\n"; } -my $hls_pgm = undef; +#my $hls_pgm = undef; +my @hls_pgms = (); +my $cpp_flags = ''; if (-f $file_name.".cpp"){ - $hls_pgm = $file_name.'.cpp -cflags "-std=c++0x" '; + $cpp_flags = '-cflags "-std=c++0x"'; + @hls_pgms = glob('*.cpp'); } else { - $hls_pgm = $file_name.".c"; + @hls_pgms = glob('*.c'); } +my @hls_files = (); +foreach my $pgm (@hls_pgms) { + if ($pgm ne 'accel_wrapper.c') { + push(@hls_files, 'add_files '.$pgm.' '.$cpp_flags); + } +} +my $hls_files_str = join "\n", @hls_files; + # should change to add all .c files -my $hls_tcl = 'open_project -reset test_c_prj -set_top test_c_func -add_files hls_pgm +my $hls_tcl = 'open_project -reset PGM_prj +set_top FUNC +HLS_FILES_STR open_solution -reset "solution1" set_part {xcvu9p-flgb2104-2-i} config_compile -ignore_long_run_time create_clock -period 10 -name default -'.$prefix_tcl.' -#source "./test_c_prj/solution1/directives.tcl" +PREFIX_TCL +#source "./PGM_prj/solution1/directives.tcl" #config_interface -clock_enable config_interface -m_axi_addr64 csynth_design @@ -53,9 +64,10 @@ my $dir = getcwd; open HLS, ">$dir/run_hls.tcl"; # replace the function name and file name -$hls_tcl =~ s/test_c_func/$func_name/g; -$hls_tcl =~ s/test_c/$file_name/g; -$hls_tcl =~ s/hls_pgm/$hls_pgm/g; +$hls_tcl =~ s/FUNC/$func_name/g; +$hls_tcl =~ s/PGM/$file_name/g; +$hls_tcl =~ s/PREFIX_TCL/$prefix_tcl/g; +$hls_tcl =~ s/HLS_FILES_STR/$hls_files_str/g; # run vivado hls diff --git a/scripts/sw_aux/makefiles/Makefile.bm.in b/scripts/sw_aux/makefiles/Makefile.bm.in index 17a2eaa..5c60590 100644 --- a/scripts/sw_aux/makefiles/Makefile.bm.in +++ b/scripts/sw_aux/makefiles/Makefile.bm.in @@ -9,28 +9,58 @@ BM_LIB=$(BM_LIB_DIR)/libriscvbm.a SRC = $(wildcard *.c) SRC_S = $(wildcard *.S) -OBJ = $(SRC:.c=.o) -OBJ_S = $(SRC:.S=.s.o) +# Transforms the contents of the src variable, +# changing all file suffixes from .c to .o, +# thus constructing the object file list we need. +OBJ = $(SRC:.c=.o) $(SRC_S:.S=.s.o) + +OBJ_ORIG = $(filter-out accel_wrapper.o, $(OBJ)) +OBJ_ACCEL = $(filter-out accel.o, $(OBJ)) + +# Filter out the orig func in accel.o with $(OBJ: filter-out accel.o, $(wildcard *.o)) +# And decide whether to link the accel wrapper +# To be backward compatible, if define the CUSTOM_INST variable the default .rv +# will call the accelerator wrapper +ifdef CUSTOM_INST +OBJ_BASE = $(OBJ_ACCEL) +else +OBJ_BASE = $(OBJ_ORIG) +endif + +ifdef CUSTOM_DRIVER +OBJ_BASE = $(OBJ_ACCEL) +else +OBJ_BASE = $(OBJ_ORIG) +endif + .PHONY: clean default: all -all: $(addsuffix .riscv,$(TARGET)) +all: $(addsuffix .bm.rv,$(TARGET)) bm_accel dumps: $(addsuffix .dump,$(TARGET)) .s.o: $(SRC_S) $(CC) $(CFLAGS) -D__ASSEMBLY__=1 -c $< -o $@ -.o: $(SRC) mmio.h +.o: $(SRC) $(CC) $(CFLAGS) -c $< -o $@ -#%.riscv: %.o crt.o syscalls.o $(BM_LIB_DIR)/bm_linker_scripts/link.ld -$(TARGET).riscv: $(OBJ) $(OBJ_S) $(BM_LIB) $(BM_LIB_DIR)/link.ld - $(CC) -T $(BM_LIB_DIR)/link.ld $(LDFLAGS) -I$(BM_LIB_DIR) -L$(BM_LIB_DIR) $(BM_LIB_FLAG) $< -o $@ +#%.rv: %.o crt.o syscalls.o $(BM_LIB_DIR)/bm_linker_scripts/link.ld +# Use $info to debug +#$(TARGET).rv: $(OBJ) $(BM_LIB) $(BM_LIB_DIR)/link.ld $(info $(OBJ)) +$(TARGET).bm.rv: Makefile $(OBJ) $(BM_LIB) $(BM_LIB_DIR)/link.ld + $(CC) -T $(BM_LIB_DIR)/link.ld $(LDFLAGS) -I$(BM_LIB_DIR) -L$(BM_LIB_DIR) $(BM_LIB_FLAG) $(OBJ_BASE) -o $@ -%.dump: %.riscv +# Add new target +bm_accel: $(addsuffix .bm_accel.rv,$(TARGET)) + +$(TARGET).bm_accel.rv: $(OBJ) $(BM_LIB) $(BM_LIB_DIR)/link.ld + $(CC) -T $(BM_LIB_DIR)/link.ld $(LDFLAGS) -I$(BM_LIB_DIR) -L$(BM_LIB_DIR) $(BM_LIB_FLAG) $(OBJ_ACCEL) -o $@ + +%.dump: %.rv $(OBJDUMP) -D $< > $@ clean: - rm -f *.riscv *.o *.dump + rm -f *.rv *.o *.dump diff --git a/scripts/sw_aux/makefiles/Makefile.gcc.in b/scripts/sw_aux/makefiles/Makefile.gcc.in index b753a35..cec290b 100644 --- a/scripts/sw_aux/makefiles/Makefile.gcc.in +++ b/scripts/sw_aux/makefiles/Makefile.gcc.in @@ -7,13 +7,15 @@ CC :=$(ARCH)-unknown-elf-g++ CFLAGS += -fpermissive -DWRAP_$(FUNC) LDFLAGS += -fpermissive -src_files := $(TARGET).c -OBJECTS = $(src_files:.c=.o) +SRC = $(wildcard *.c) +OBJ = $(SRC:.c=.o) + +OBJ_ORIG = $(filter-out accel_wrapper.o, $(OBJ)) all: $(TARGET).rv -$(TARGET).rv: Makefile $(OBJECTS) - $(CC) $(LDFLAGS) $(OBJECTS) -o $@ +$(TARGET).rv: Makefile $(OBJ_ORIG) + $(CC) $(LDFLAGS) $(OBJ_ORIG) -o $@ $(ARCH)-unknown-elf-objdump -D $@ > $(TARGET).asm .c.o: @@ -27,4 +29,4 @@ run: spike pk $(TARGET).rv clean: - rm -f $(OBJECTS) $(TARGET).rv $(TARGET).asm $(TARGET).ll $(TARGET).S + rm -f $(OBJ) $(TARGET).rv $(TARGET).asm $(TARGET).ll $(TARGET).S