Coyote/sw/examples/perf_fpga/main.cpp

162 lines
4.9 KiB
C++
Raw Normal View History

2022-05-16 04:52:56 +08:00
#include <iostream>
#include <string>
#include <malloc.h>
#include <time.h>
#include <sys/time.h>
#include <chrono>
#include <fstream>
#include <fcntl.h>
#include <unistd.h>
#include <iomanip>
#ifdef EN_AVX
#include <x86intrin.h>
#endif
#include <boost/program_options.hpp>
#include <numeric>
#include <stdlib.h>
#include "cBench.hpp"
2022-11-04 00:32:34 +08:00
#include "cProcess.hpp"
2022-05-16 04:52:56 +08:00
using namespace std;
using namespace fpga;
/* Def params */
constexpr auto const targetRegion = 0;
constexpr auto const nReps = 1;
constexpr auto const defSize = 128; // 2^7
constexpr auto const maxSize = 16 * 1024;
constexpr auto const clkNs = 1000.0 / 300.0;
constexpr auto const nBenchRuns = 100;
/**
* @brief Benchmark API
*
*/
enum class BenchRegs : uint32_t {
CTRL_REG = 0,
DONE_REG = 1,
TIMER_REG = 2,
VADDR_REG = 3,
LEN_REG = 4,
PID_REG = 5,
N_REPS_REG = 6,
N_BEATS_REG = 7,
DEST_REG = 8
2022-05-16 04:52:56 +08:00
};
enum class BenchOper : uint8_t {
START_RD = 0x1,
START_WR = 0x2
};
/**
* @brief Average it out
*
*/
double vctr_avg(std::vector<double> const& v) {
return 1.0 * std::accumulate(v.begin(), v.end(), 0LL) / v.size();
}
/**
* @brief Throughput and latency tests, read and write
*
*/
int main(int argc, char *argv[])
{
// ---------------------------------------------------------------
// Args
// ---------------------------------------------------------------
uint32_t n_reps = nReps;
uint32_t n_pages = (maxSize + hugePageSize - 1) / hugePageSize;
uint32_t curr_size = defSize;
uint32_t dest = 0 ;
2022-05-16 04:52:56 +08:00
PR_HEADER("PARAMS");
std::cout << "vFPGA ID: " << targetRegion << std::endl;
2022-10-13 21:09:00 +08:00
std::cout << "Number of allocated pages (hugepages need to be enabled, otherwise alloc with HOST_2M!): " << n_pages << std::endl;
2022-05-16 04:52:56 +08:00
// ---------------------------------------------------------------
// Init
// ---------------------------------------------------------------
// Handles and alloc
2022-11-04 00:32:34 +08:00
cProcess cproc(targetRegion, getpid());
2022-12-14 17:21:30 +08:00
void* hMem = cproc.getMem({CoyoteAlloc::HOST_2M, n_pages});
2022-05-16 04:52:56 +08:00
// ---------------------------------------------------------------
// Runs
// ---------------------------------------------------------------
// Run Throughput
2022-11-04 00:32:34 +08:00
auto benchmark_run = [&](cProcess& cproc, const void* hMem, const BenchOper oper) {
2022-05-16 04:52:56 +08:00
// Set params
cproc.setCSR(reinterpret_cast<uint64_t>(hMem), static_cast<uint32_t>(BenchRegs::VADDR_REG));
cproc.setCSR(curr_size, static_cast<uint32_t>(BenchRegs::LEN_REG));
cproc.setCSR(cproc.getCpid(), static_cast<uint32_t>(BenchRegs::PID_REG));
cproc.setCSR(n_reps, static_cast<uint32_t>(BenchRegs::N_REPS_REG));
uint64_t n_beats = n_reps * ((curr_size + 64 - 1) / 64);
cproc.setCSR(n_beats, static_cast<uint32_t>(BenchRegs::N_BEATS_REG));
cproc.setCSR(dest, static_cast<uint32_t>(BenchRegs::DEST_REG));
2022-05-16 04:52:56 +08:00
// Fire
cproc.setCSR(static_cast<uint64_t>(oper), static_cast<uint32_t>(BenchRegs::CTRL_REG));
while(cproc.getCSR(static_cast<uint32_t>(BenchRegs::DONE_REG)) < n_reps) ;
return (double)(cproc.getCSR(static_cast<uint32_t>(BenchRegs::TIMER_REG))) * clkNs;
};
// Elapsed
std::vector<double> time_bench_rd;
std::vector<double> time_bench_wr;
// Throughput run
2022-10-13 21:09:00 +08:00
PR_HEADER("HOST THROUGHPUT");
2022-05-16 04:52:56 +08:00
while(curr_size <= maxSize) {
for(int j = 0; j < nBenchRuns; j++) {
time_bench_rd.emplace_back(benchmark_run(cproc, hMem, BenchOper::START_RD));
time_bench_wr.emplace_back(benchmark_run(cproc, hMem, BenchOper::START_WR));
}
2022-11-04 00:32:34 +08:00
std::cout << std::fixed << std::setprecision(2);
std::cout << std::setw(8) << curr_size << " [bytes], RD: "
<< std::setw(8) << ((n_reps * 1024 * curr_size) / vctr_avg(time_bench_rd)) << " [MB/s], WR: "
<< std::setw(8) << ((n_reps * 1024 * curr_size) / vctr_avg(time_bench_wr)) << " [MB/s]" << std::endl;
2022-05-16 04:52:56 +08:00
time_bench_rd.clear();
time_bench_wr.clear();
curr_size *= 2;
cproc.clearCompleted();
}
n_reps = 1;
curr_size = defSize;
// Latency run
2022-10-13 21:09:00 +08:00
PR_HEADER("HOST LATENCY");
2022-05-16 04:52:56 +08:00
while(curr_size <= maxSize) {
for(int j = 0; j < nBenchRuns; j++) {
time_bench_rd.emplace_back(benchmark_run(cproc, hMem, BenchOper::START_RD));
time_bench_wr.emplace_back(benchmark_run(cproc, hMem, BenchOper::START_WR));
}
2022-11-04 00:32:34 +08:00
std::cout << std::setw(8) << curr_size << " [bytes], RD: "
<< std::setw(8) << vctr_avg(time_bench_rd) << " [ns], WR: "
<< std::setw(8) << vctr_avg(time_bench_wr) << " [ns]" << std::endl;
2022-05-16 04:52:56 +08:00
time_bench_rd.clear();
time_bench_wr.clear();
curr_size *= 2;
cproc.clearCompleted();
}
std::cout << "\n";
// ---------------------------------------------------------------
// Exit
// ---------------------------------------------------------------
return EXIT_SUCCESS;
}