Update examples/scripts to force user define accel in accel.c
This commit is contained in:
parent
1b802a85a2
commit
72769f9a1c
|
@ -0,0 +1,36 @@
|
|||
#include "accel.h"
|
||||
|
||||
int vadd(int* length_a, int* b_c) {
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=length_a
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=b_c
|
||||
|
||||
int length = length_a[0];
|
||||
int * a = & length_a[1];
|
||||
//int * b = b_c;
|
||||
//int * c = & b_c[length];
|
||||
// For pointer type
|
||||
|
||||
//#pragma HLS DATAFLOW
|
||||
int upper = (length >> 3) << 3;
|
||||
int i = 0;
|
||||
for (i = 0; i < upper; i += 8) {
|
||||
// To prevent burst mode
|
||||
b_c[length+i+1] = a[i+1] +b_c[i+1];
|
||||
b_c[length+i+0] = a[i+0] +b_c[i+0];
|
||||
b_c[length+i+2] = a[i+2] +b_c[i+2];
|
||||
b_c[length+i+3] = a[i+3] +b_c[i+3];
|
||||
|
||||
b_c[length+i+4] = a[i+4] +b_c[i+4];
|
||||
b_c[length+i+5] = a[i+5] +b_c[i+5];
|
||||
b_c[length+i+6] = a[i+6] +b_c[i+6];
|
||||
b_c[length+i+7] = a[i+7] +b_c[i+7];
|
||||
}
|
||||
|
||||
int output = 0;
|
||||
for (i = upper; i < length; i++) {
|
||||
b_c[length+i] = a[i] +b_c[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef ACCEL_H
|
||||
#define ACCEL_H
|
||||
|
||||
// MUST ADD
|
||||
// In generated accel wrapper the ACCEL_WRAPPER is defined
|
||||
// The accel_wrapper.h is also generated
|
||||
#ifdef ACCEL_WRAPPER
|
||||
#include "accel_wrapper.h"
|
||||
#else
|
||||
int vadd(int* length_a, int* b_c);
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,11 +1,10 @@
|
|||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#include "time.h"
|
||||
#ifdef CUSTOM_INST
|
||||
#include "bm_wrapper.h"
|
||||
#endif
|
||||
#define LENGTH 80
|
||||
|
||||
#include "accel.h"
|
||||
|
||||
void print_vec(int* vec, int length){
|
||||
for(int i = 0; i < length; i++){
|
||||
if (i != 0 ) printf(", ");
|
||||
|
@ -13,39 +12,6 @@ void print_vec(int* vec, int length){
|
|||
}
|
||||
}
|
||||
|
||||
int vadd(int* length_a, int* b_c) {
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=length_a
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=b_c
|
||||
|
||||
int length = length_a[0];
|
||||
int * a = & length_a[1];
|
||||
//int * b = b_c;
|
||||
//int * c = & b_c[length];
|
||||
// For pointer type
|
||||
|
||||
//#pragma HLS DATAFLOW
|
||||
int upper = (length >> 3) << 3;
|
||||
int i = 0;
|
||||
for (i = 0; i < upper; i += 8) {
|
||||
// To prevent burst mode
|
||||
b_c[length+i+1] = a[i+1] +b_c[i+1];
|
||||
b_c[length+i+0] = a[i+0] +b_c[i+0];
|
||||
b_c[length+i+2] = a[i+2] +b_c[i+2];
|
||||
b_c[length+i+3] = a[i+3] +b_c[i+3];
|
||||
|
||||
b_c[length+i+4] = a[i+4] +b_c[i+4];
|
||||
b_c[length+i+5] = a[i+5] +b_c[i+5];
|
||||
b_c[length+i+6] = a[i+6] +b_c[i+6];
|
||||
b_c[length+i+7] = a[i+7] +b_c[i+7];
|
||||
}
|
||||
|
||||
int output = 0;
|
||||
for (i = upper; i < length; i++) {
|
||||
b_c[length+i] = a[i] +b_c[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main () {
|
||||
int length_a[LENGTH + 1], b_c[LENGTH + LENGTH];
|
||||
//int a[LENGTH], b[LENGTH], c[LENGTH];
|
||||
|
@ -59,11 +25,7 @@ int main () {
|
|||
|
||||
uint64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_INST
|
||||
vadd_wrapper(length_a, b_c);
|
||||
#else
|
||||
vadd(length_a, b_c);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
#include "accel.h"
|
||||
|
||||
int vadd(int * a, int * b, int* c, int length) {
|
||||
|
||||
// For pointer type
|
||||
#pragma HLS INTERFACE m_axi port=a offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125
|
||||
|
||||
#pragma HLS INTERFACE m_axi port=b offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125
|
||||
// Slave is for AXI4Lite, with burst mode disabled
|
||||
#pragma HLS INTERFACE m_axi port=c offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125
|
||||
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=a bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=b bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=c bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=length bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
//#pragma HLS DATAFLOW
|
||||
int upper = (length >> 3) << 3;
|
||||
int i = 0;
|
||||
for (i = 0; i < upper; i += 8) {
|
||||
// To prevent burst mode
|
||||
c[i+0] = a[i+0] +b[i+0];
|
||||
c[i+1] = a[i+1] +b[i+1];
|
||||
c[i+2] = a[i+2] +b[i+2];
|
||||
c[i+3] = a[i+3] +b[i+3];
|
||||
|
||||
c[i+4] = a[i+4] +b[i+4];
|
||||
c[i+5] = a[i+5] +b[i+5];
|
||||
c[i+6] = a[i+6] +b[i+6];
|
||||
c[i+7] = a[i+7] +b[i+7];
|
||||
}
|
||||
|
||||
for (i = upper; i < length; i++) {
|
||||
c[i] = a[i] +b[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//Including to use ap_uint<> datatype
|
||||
//#include <ap_int.h>
|
||||
/*
|
||||
#define BUFFER_SIZE 128
|
||||
#define DATAWIDTH 512
|
||||
#define VECTOR_SIZE (DATAWIDTH / 32) // vector size is 16 (512/32 = 16)
|
||||
//typedef ap_uint<DATAWIDTH> uint512_dt;
|
||||
|
||||
Vector Addition Kernel Implementation using uint512_dt datatype
|
||||
Arguments:
|
||||
in1 (input) --> Input Vector1
|
||||
in2 (input) --> Input Vector2
|
||||
out (output) --> Output Vector
|
||||
size (input) --> Size of Vector in Integer
|
||||
*/
|
||||
/*extern "C" {
|
||||
void vadd(
|
||||
const uint512_dt *in1, // Read-Only Vector 1
|
||||
const uint512_dt *in2, // Read-Only Vector 2
|
||||
uint512_dt *out, // Output Result
|
||||
int size // Size in integer
|
||||
)
|
||||
{
|
||||
#pragma HLS INTERFACE m_axi port=in1 offset=slave bundle=gmem
|
||||
#pragma HLS INTERFACE m_axi port=in2 offset=slave bundle=gmem
|
||||
#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem
|
||||
#pragma HLS INTERFACE s_axilite port=in1 bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=in2 bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=out bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=size bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
uint512_dt v1_local[BUFFER_SIZE]; // Local memory to store vector1
|
||||
uint512_dt result_local[BUFFER_SIZE];// Local Memory to store result
|
||||
|
||||
// Input vector size for interger vectors. However kernel is directly
|
||||
// accessing 512bit data (total 16 elements). So total number of read
|
||||
// from global memory is calculated here:
|
||||
int size_in16 = (size-1) / VECTOR_SIZE + 1;
|
||||
|
||||
//Per iteration of this loop perform BUFFER_SIZE vector addition
|
||||
for(int i = 0; i < size_in16; i += BUFFER_SIZE)
|
||||
{
|
||||
#pragma HLS LOOP_TRIPCOUNT min=8 max=8
|
||||
int chunk_size = BUFFER_SIZE;
|
||||
|
||||
//boundary checks
|
||||
if ((i + BUFFER_SIZE) > size_in16)
|
||||
chunk_size = size_in16 - i;
|
||||
|
||||
//burst read first vector from global memory to local memory
|
||||
v1_rd: for (int j = 0 ; j < chunk_size; j++){
|
||||
#pragma HLS pipeline
|
||||
#pragma HLS LOOP_TRIPCOUNT min=128 max=128
|
||||
v1_local[j] = in1 [i + j];
|
||||
}
|
||||
|
||||
//burst read second vector and perform vector addition
|
||||
v2_rd_add: for (int j = 0 ; j < chunk_size; j++){
|
||||
#pragma HLS pipeline
|
||||
#pragma HLS LOOP_TRIPCOUNT min=128 max=128
|
||||
uint512_dt tmpV1 = v1_local[j];
|
||||
uint512_dt tmpV2 = in2[i+j];
|
||||
result_local[j] = tmpV1 + tmpV2; // Vector Addition Operation
|
||||
}
|
||||
|
||||
//burst write the result
|
||||
out_write: for (int j = 0 ; j < chunk_size; j++){
|
||||
#pragma HLS pipeline
|
||||
#pragma HLS LOOP_TRIPCOUNT min=128 max=128
|
||||
out[i+j] = result_local[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef ACCEL_H
|
||||
#define ACCEL_H
|
||||
|
||||
// MUST ADD
|
||||
// In generated accel wrapper the ACCEL_WRAPPER is defined
|
||||
// The accel_wrapper.h is also generated
|
||||
#ifdef ACCEL_WRAPPER
|
||||
#include "accel_wrapper.h"
|
||||
#else
|
||||
int vadd(int * a, int * b, int* c, int length);
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -1,10 +1,8 @@
|
|||
#include<stdio.h>
|
||||
#include<stdint.h>
|
||||
#define LENGTH 80
|
||||
#ifdef CUSTOM_DRIVER
|
||||
#include "bm_wrapper.h"
|
||||
#endif
|
||||
#include "time.h"
|
||||
#include "accel.h"
|
||||
|
||||
void print_vec(int* vec, int length){
|
||||
for(int i = 0; i < length; i++){
|
||||
|
@ -13,121 +11,6 @@ void print_vec(int* vec, int length){
|
|||
}
|
||||
}
|
||||
|
||||
//Including to use ap_uint<> datatype
|
||||
//#include <ap_int.h>
|
||||
/*
|
||||
#define BUFFER_SIZE 128
|
||||
#define DATAWIDTH 512
|
||||
#define VECTOR_SIZE (DATAWIDTH / 32) // vector size is 16 (512/32 = 16)
|
||||
//typedef ap_uint<DATAWIDTH> uint512_dt;
|
||||
|
||||
Vector Addition Kernel Implementation using uint512_dt datatype
|
||||
Arguments:
|
||||
in1 (input) --> Input Vector1
|
||||
in2 (input) --> Input Vector2
|
||||
out (output) --> Output Vector
|
||||
size (input) --> Size of Vector in Integer
|
||||
*/
|
||||
/*extern "C" {
|
||||
void vadd(
|
||||
const uint512_dt *in1, // Read-Only Vector 1
|
||||
const uint512_dt *in2, // Read-Only Vector 2
|
||||
uint512_dt *out, // Output Result
|
||||
int size // Size in integer
|
||||
)
|
||||
{
|
||||
#pragma HLS INTERFACE m_axi port=in1 offset=slave bundle=gmem
|
||||
#pragma HLS INTERFACE m_axi port=in2 offset=slave bundle=gmem
|
||||
#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem
|
||||
#pragma HLS INTERFACE s_axilite port=in1 bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=in2 bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=out bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=size bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
uint512_dt v1_local[BUFFER_SIZE]; // Local memory to store vector1
|
||||
uint512_dt result_local[BUFFER_SIZE];// Local Memory to store result
|
||||
|
||||
// Input vector size for interger vectors. However kernel is directly
|
||||
// accessing 512bit data (total 16 elements). So total number of read
|
||||
// from global memory is calculated here:
|
||||
int size_in16 = (size-1) / VECTOR_SIZE + 1;
|
||||
|
||||
//Per iteration of this loop perform BUFFER_SIZE vector addition
|
||||
for(int i = 0; i < size_in16; i += BUFFER_SIZE)
|
||||
{
|
||||
#pragma HLS LOOP_TRIPCOUNT min=8 max=8
|
||||
int chunk_size = BUFFER_SIZE;
|
||||
|
||||
//boundary checks
|
||||
if ((i + BUFFER_SIZE) > size_in16)
|
||||
chunk_size = size_in16 - i;
|
||||
|
||||
//burst read first vector from global memory to local memory
|
||||
v1_rd: for (int j = 0 ; j < chunk_size; j++){
|
||||
#pragma HLS pipeline
|
||||
#pragma HLS LOOP_TRIPCOUNT min=128 max=128
|
||||
v1_local[j] = in1 [i + j];
|
||||
}
|
||||
|
||||
//burst read second vector and perform vector addition
|
||||
v2_rd_add: for (int j = 0 ; j < chunk_size; j++){
|
||||
#pragma HLS pipeline
|
||||
#pragma HLS LOOP_TRIPCOUNT min=128 max=128
|
||||
uint512_dt tmpV1 = v1_local[j];
|
||||
uint512_dt tmpV2 = in2[i+j];
|
||||
result_local[j] = tmpV1 + tmpV2; // Vector Addition Operation
|
||||
}
|
||||
|
||||
//burst write the result
|
||||
out_write: for (int j = 0 ; j < chunk_size; j++){
|
||||
#pragma HLS pipeline
|
||||
#pragma HLS LOOP_TRIPCOUNT min=128 max=128
|
||||
out[i+j] = result_local[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
int vadd(int * a, int * b, int* c, int length) {
|
||||
|
||||
// For pointer type
|
||||
#pragma HLS INTERFACE m_axi port=a offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125
|
||||
|
||||
#pragma HLS INTERFACE m_axi port=b offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125
|
||||
// Slave is for AXI4Lite, with burst mode disabled
|
||||
#pragma HLS INTERFACE m_axi port=c offset=slave bundle=gmem0 num_write_outstanding=16 num_read_outstanding=16 max_write_burst_length=16 max_read_burst_length= 16 depth=16 latency=125
|
||||
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=a bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=b bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=c bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=length bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
//#pragma HLS DATAFLOW
|
||||
int upper = (length >> 3) << 3;
|
||||
int i = 0;
|
||||
for (i = 0; i < upper; i += 8) {
|
||||
// To prevent burst mode
|
||||
c[i+0] = a[i+0] +b[i+0];
|
||||
c[i+1] = a[i+1] +b[i+1];
|
||||
c[i+2] = a[i+2] +b[i+2];
|
||||
c[i+3] = a[i+3] +b[i+3];
|
||||
|
||||
c[i+4] = a[i+4] +b[i+4];
|
||||
c[i+5] = a[i+5] +b[i+5];
|
||||
c[i+6] = a[i+6] +b[i+6];
|
||||
c[i+7] = a[i+7] +b[i+7];
|
||||
}
|
||||
|
||||
int output = 0;
|
||||
for (i = upper; i < length; i++) {
|
||||
c[i] = a[i] +b[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main () {
|
||||
|
||||
int a[LENGTH], b[LENGTH], c[LENGTH];
|
||||
|
@ -137,14 +20,10 @@ int main () {
|
|||
a[i] = i;
|
||||
b[i] = i + 5;
|
||||
}
|
||||
uint64_t begin, end, dur;
|
||||
uint64_t begin, end;
|
||||
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_DRIVER
|
||||
vadd_wrapper(a, b, c, length);
|
||||
#else
|
||||
vadd(a, b, c, length);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
printf("A = [");
|
||||
|
|
|
@ -24,8 +24,9 @@ if ((not defined($rdir)) or $rdir eq '') {
|
|||
print("Please source sourceme-f1.sh!\n");
|
||||
exit();
|
||||
}
|
||||
my $wrapper_func_name = $func_name."_wrapper";
|
||||
my $wrapper_header= "bm_wrapper.h";
|
||||
my $wrapper_func_name = $func_name;
|
||||
my $wrapper_file= "accel_wrapper.c";
|
||||
my $wrapper_header= "accel_wrapper.h";
|
||||
if ($prefix) {
|
||||
$func_name = $prefix.$func_name;
|
||||
}
|
||||
|
@ -256,6 +257,8 @@ foreach my $arg (@verilog_input_pointer_arg) {
|
|||
}
|
||||
|
||||
my $wrapper = '#include "'.$bm_inc_path.'rocc.h"'."\n";
|
||||
$wrapper .="#define ACCEL_WRAPPER\n";
|
||||
$wrapper .='#include "accel.h"'."\n";
|
||||
|
||||
my $return_type = "void ";
|
||||
if($ap_return){
|
||||
|
@ -263,7 +266,7 @@ if($ap_return){
|
|||
}
|
||||
|
||||
my $total_args = @verilog_input_scalar + $hash_count;
|
||||
$wrapper .= "$return_type $wrapper_func_name(";
|
||||
my $func_prototype = "$return_type $wrapper_func_name(";
|
||||
|
||||
my @args = ();
|
||||
foreach my $arg (@verilog_input_scalar) {
|
||||
|
@ -277,20 +280,23 @@ my $arg_str = join ', ', @args;
|
|||
$i = 0;
|
||||
foreach my $arg (@args) {
|
||||
if ($i != 0){
|
||||
$wrapper .=", "
|
||||
$func_prototype.=", "
|
||||
}
|
||||
$wrapper .="uint64_t $arg";
|
||||
$func_prototype .="uint64_t $arg";
|
||||
|
||||
$i=1;
|
||||
}
|
||||
$wrapper .= ") {
|
||||
";
|
||||
$func_prototype .= ")";
|
||||
|
||||
$wrapper .= $func_prototype;
|
||||
if($ap_return){
|
||||
$wrapper .= " uint64_t ret_val;\n";
|
||||
}
|
||||
$wrapper .= "
|
||||
#ifdef CUSTOM_INST
|
||||
{
|
||||
uint64_t ret_val;\n";
|
||||
}
|
||||
|
||||
#$wrapper .= " ROCC_BARRIER();\n";
|
||||
$wrapper .="
|
||||
#define XCUSTOM_ACC ";
|
||||
$wrapper .= $rocc_index."\n";
|
||||
|
||||
|
@ -312,12 +318,20 @@ if ($ap_return){
|
|||
}
|
||||
}
|
||||
$wrapper .= " ROCC_BARRIER();\n";
|
||||
$wrapper.=" #endif\n";
|
||||
if($ap_return){
|
||||
$wrapper .= " return ret_val;\n";
|
||||
}
|
||||
$wrapper.="}";
|
||||
|
||||
open FILE, "> $wrapper_header";
|
||||
print FILE $wrapper;
|
||||
print FILE "#ifndef ACCEL_WRAPPER_H
|
||||
#define ACCEL_WRAPPER_H\n
|
||||
";
|
||||
print FILE "$func_prototype;\n";
|
||||
print FILE "#endif";
|
||||
close FILE;
|
||||
|
||||
open FILE, "> $wrapper_file";
|
||||
print FILE $wrapper;
|
||||
close FILE;
|
||||
|
||||
|
|
|
@ -26,8 +26,10 @@ if ((not defined($rdir)) or $rdir eq '') {
|
|||
exit();
|
||||
}
|
||||
|
||||
my $wrapper_func_name = $func_name."_wrapper";
|
||||
my $wrapper_header= "bm_wrapper.h";
|
||||
my $wrapper_func_name = $func_name;
|
||||
my $wrapper_file= "accel_wrapper.c";
|
||||
my $wrapper_header= "accel_wrapper.h";
|
||||
|
||||
|
||||
if ($prefix) {
|
||||
$func_name = $prefix.$func_name;
|
||||
|
@ -93,7 +95,8 @@ if(!open VERILOG, "$verilog_file"){
|
|||
print("\n");
|
||||
}
|
||||
my $wrapper = '#include "'.$bm_inc_path.'mmio.h"'."\n";
|
||||
|
||||
$wrapper .="#define ACCEL_WRAPPER\n";
|
||||
$wrapper .='#include "accel.h"'."\n";
|
||||
$wrapper .= '#define ACCEL_BASE '.$func_base_addr."\n";
|
||||
|
||||
$wrapper .= "#define AP_DONE_MASK 0b10\n";
|
||||
|
@ -119,10 +122,12 @@ if(!open VERILOG, "$verilog_file"){
|
|||
$ap_return = 1;
|
||||
}
|
||||
|
||||
|
||||
my $func_prototype = '';
|
||||
if ($ap_return){
|
||||
$wrapper .= $ap_return_type." $wrapper_func_name(";
|
||||
$func_prototype .= $ap_return_type." $wrapper_func_name(";
|
||||
} else {
|
||||
$wrapper .="void $wrapper_func_name(";
|
||||
$func_prototype .="void $wrapper_func_name(";
|
||||
}
|
||||
|
||||
my @arglist=();
|
||||
|
@ -140,10 +145,11 @@ if(!open VERILOG, "$verilog_file"){
|
|||
}
|
||||
|
||||
my $args = join ', ', @arglist;
|
||||
$wrapper.= $args.") {";
|
||||
$func_prototype .= $args.")";
|
||||
|
||||
$wrapper .= $func_prototype;
|
||||
$wrapper.= '
|
||||
// Disable Interrupt
|
||||
{ // Disable Interrupt
|
||||
reg_write32(ACCEL_BASE + ACCEL_INT, 0x0);
|
||||
';
|
||||
|
||||
|
@ -199,8 +205,18 @@ if(!open VERILOG, "$verilog_file"){
|
|||
}
|
||||
|
||||
$wrapper .="}\n";
|
||||
|
||||
open FILE, "> $wrapper_header";
|
||||
print FILE "#ifndef ACCEL_WRAPPER_H
|
||||
#define ACCEL_WRAPPER_H\n
|
||||
";
|
||||
print FILE "$func_prototype;\n";
|
||||
print FILE "#endif";
|
||||
close FILE;
|
||||
|
||||
open FILE, "> $wrapper_file";
|
||||
print FILE $wrapper;
|
||||
close FILE;
|
||||
#}
|
||||
|
||||
#generate_bm_wrapper(\%var_dict, $func_base_addr);
|
||||
|
|
|
@ -19,30 +19,41 @@ if ($num_args > 2) {
|
|||
# Generate directive file based on LLVM emitted output
|
||||
# If the variable is of pointer type that an ap_bus interface is generated
|
||||
|
||||
my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "test_c_func" test_var
|
||||
my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "FUNC" test_var
|
||||
';
|
||||
|
||||
my $prefix_tcl = "";
|
||||
if ($prefix) {
|
||||
$prefix_tcl = "config_rtl -prefix ".$prefix."\n";
|
||||
}
|
||||
my $hls_pgm = undef;
|
||||
#my $hls_pgm = undef;
|
||||
my @hls_pgms = ();
|
||||
my $cpp_flags = '';
|
||||
if (-f $file_name.".cpp"){
|
||||
$hls_pgm = $file_name.'.cpp -cflags "-std=c++0x" ';
|
||||
$cpp_flags = '-cflags "-std=c++0x"';
|
||||
@hls_pgms = glob('*.cpp');
|
||||
} else {
|
||||
$hls_pgm = $file_name.".c";
|
||||
@hls_pgms = glob('*.c');
|
||||
}
|
||||
|
||||
my @hls_files = ();
|
||||
foreach my $pgm (@hls_pgms) {
|
||||
if ($pgm ne 'accel_wrapper.c') {
|
||||
push(@hls_files, 'add_files '.$pgm.' '.$cpp_flags);
|
||||
}
|
||||
}
|
||||
my $hls_files_str = join "\n", @hls_files;
|
||||
|
||||
# should change to add all .c files
|
||||
my $hls_tcl = 'open_project -reset test_c_prj
|
||||
set_top test_c_func
|
||||
add_files hls_pgm
|
||||
my $hls_tcl = 'open_project -reset PGM_prj
|
||||
set_top FUNC
|
||||
HLS_FILES_STR
|
||||
open_solution -reset "solution1"
|
||||
set_part {xcvu9p-flgb2104-2-i}
|
||||
config_compile -ignore_long_run_time
|
||||
create_clock -period 10 -name default
|
||||
'.$prefix_tcl.'
|
||||
#source "./test_c_prj/solution1/directives.tcl"
|
||||
PREFIX_TCL
|
||||
#source "./PGM_prj/solution1/directives.tcl"
|
||||
#config_interface -clock_enable
|
||||
config_interface -m_axi_addr64
|
||||
csynth_design
|
||||
|
@ -53,9 +64,10 @@ my $dir = getcwd;
|
|||
open HLS, ">$dir/run_hls.tcl";
|
||||
|
||||
# replace the function name and file name
|
||||
$hls_tcl =~ s/test_c_func/$func_name/g;
|
||||
$hls_tcl =~ s/test_c/$file_name/g;
|
||||
$hls_tcl =~ s/hls_pgm/$hls_pgm/g;
|
||||
$hls_tcl =~ s/FUNC/$func_name/g;
|
||||
$hls_tcl =~ s/PGM/$file_name/g;
|
||||
$hls_tcl =~ s/PREFIX_TCL/$prefix_tcl/g;
|
||||
$hls_tcl =~ s/HLS_FILES_STR/$hls_files_str/g;
|
||||
|
||||
|
||||
# run vivado hls
|
||||
|
|
|
@ -9,28 +9,58 @@ BM_LIB=$(BM_LIB_DIR)/libriscvbm.a
|
|||
|
||||
SRC = $(wildcard *.c)
|
||||
SRC_S = $(wildcard *.S)
|
||||
OBJ = $(SRC:.c=.o)
|
||||
OBJ_S = $(SRC:.S=.s.o)
|
||||
# Transforms the contents of the src variable,
|
||||
# changing all file suffixes from .c to .o,
|
||||
# thus constructing the object file list we need.
|
||||
OBJ = $(SRC:.c=.o) $(SRC_S:.S=.s.o)
|
||||
|
||||
OBJ_ORIG = $(filter-out accel_wrapper.o, $(OBJ))
|
||||
OBJ_ACCEL = $(filter-out accel.o, $(OBJ))
|
||||
|
||||
# Filter out the orig func in accel.o with $(OBJ: filter-out accel.o, $(wildcard *.o))
|
||||
# And decide whether to link the accel wrapper
|
||||
# To be backward compatible, if define the CUSTOM_INST variable the default .rv
|
||||
# will call the accelerator wrapper
|
||||
ifdef CUSTOM_INST
|
||||
OBJ_BASE = $(OBJ_ACCEL)
|
||||
else
|
||||
OBJ_BASE = $(OBJ_ORIG)
|
||||
endif
|
||||
|
||||
ifdef CUSTOM_DRIVER
|
||||
OBJ_BASE = $(OBJ_ACCEL)
|
||||
else
|
||||
OBJ_BASE = $(OBJ_ORIG)
|
||||
endif
|
||||
|
||||
.PHONY: clean
|
||||
|
||||
default: all
|
||||
|
||||
all: $(addsuffix .riscv,$(TARGET))
|
||||
all: $(addsuffix .bm.rv,$(TARGET)) bm_accel
|
||||
|
||||
dumps: $(addsuffix .dump,$(TARGET))
|
||||
|
||||
.s.o: $(SRC_S)
|
||||
$(CC) $(CFLAGS) -D__ASSEMBLY__=1 -c $< -o $@
|
||||
|
||||
.o: $(SRC) mmio.h
|
||||
.o: $(SRC)
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
#%.riscv: %.o crt.o syscalls.o $(BM_LIB_DIR)/bm_linker_scripts/link.ld
|
||||
$(TARGET).riscv: $(OBJ) $(OBJ_S) $(BM_LIB) $(BM_LIB_DIR)/link.ld
|
||||
$(CC) -T $(BM_LIB_DIR)/link.ld $(LDFLAGS) -I$(BM_LIB_DIR) -L$(BM_LIB_DIR) $(BM_LIB_FLAG) $< -o $@
|
||||
#%.rv: %.o crt.o syscalls.o $(BM_LIB_DIR)/bm_linker_scripts/link.ld
|
||||
# Use $info to debug
|
||||
#$(TARGET).rv: $(OBJ) $(BM_LIB) $(BM_LIB_DIR)/link.ld $(info $(OBJ))
|
||||
$(TARGET).bm.rv: Makefile $(OBJ) $(BM_LIB) $(BM_LIB_DIR)/link.ld
|
||||
$(CC) -T $(BM_LIB_DIR)/link.ld $(LDFLAGS) -I$(BM_LIB_DIR) -L$(BM_LIB_DIR) $(BM_LIB_FLAG) $(OBJ_BASE) -o $@
|
||||
|
||||
%.dump: %.riscv
|
||||
# Add new target
|
||||
bm_accel: $(addsuffix .bm_accel.rv,$(TARGET))
|
||||
|
||||
$(TARGET).bm_accel.rv: $(OBJ) $(BM_LIB) $(BM_LIB_DIR)/link.ld
|
||||
$(CC) -T $(BM_LIB_DIR)/link.ld $(LDFLAGS) -I$(BM_LIB_DIR) -L$(BM_LIB_DIR) $(BM_LIB_FLAG) $(OBJ_ACCEL) -o $@
|
||||
|
||||
%.dump: %.rv
|
||||
$(OBJDUMP) -D $< > $@
|
||||
|
||||
clean:
|
||||
rm -f *.riscv *.o *.dump
|
||||
rm -f *.rv *.o *.dump
|
||||
|
|
|
@ -7,13 +7,15 @@ CC :=$(ARCH)-unknown-elf-g++
|
|||
CFLAGS += -fpermissive -DWRAP_$(FUNC)
|
||||
LDFLAGS += -fpermissive
|
||||
|
||||
src_files := $(TARGET).c
|
||||
OBJECTS = $(src_files:.c=.o)
|
||||
SRC = $(wildcard *.c)
|
||||
OBJ = $(SRC:.c=.o)
|
||||
|
||||
OBJ_ORIG = $(filter-out accel_wrapper.o, $(OBJ))
|
||||
|
||||
all: $(TARGET).rv
|
||||
|
||||
$(TARGET).rv: Makefile $(OBJECTS)
|
||||
$(CC) $(LDFLAGS) $(OBJECTS) -o $@
|
||||
$(TARGET).rv: Makefile $(OBJ_ORIG)
|
||||
$(CC) $(LDFLAGS) $(OBJ_ORIG) -o $@
|
||||
$(ARCH)-unknown-elf-objdump -D $@ > $(TARGET).asm
|
||||
|
||||
.c.o:
|
||||
|
@ -27,4 +29,4 @@ run:
|
|||
spike pk $(TARGET).rv
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS) $(TARGET).rv $(TARGET).asm $(TARGET).ll $(TARGET).S
|
||||
rm -f $(OBJ) $(TARGET).rv $(TARGET).asm $(TARGET).ll $(TARGET).S
|
||||
|
|
Loading…
Reference in New Issue