Merge pull request #1 from hqjenny/hardcodedWorkloads

Hardcoded workloads
This commit is contained in:
hqjenny 2019-11-04 19:42:57 -08:00 committed by GitHub
commit 7c7e0659ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
415 changed files with 3161616 additions and 15329 deletions

9
workloads/.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
*.o
*.a
*.driver
*.riscv
*.rv
*.ll
*.S
*.asm
.doit.db

62
workloads/README.md Normal file
View File

@ -0,0 +1,62 @@
# FireMarshal Workloads for Centrifuge
## Hardcoded Linux Examples
The `hardcoded/` directory contains a number of hand-written benchmarks for
linux that use various centrifuge-generated accelerators. Most can be built
simply by running `make` in their respective directories. However, to actually
use the benchmark you will need add it to a linux-based workload (i.e. kernel +
root filesystem). The FireMarshal tool (included with chipyard) helps automate
this process.
## Vadd Example Quickstart
If you simply want to try out the example vector add workload, you can use the
following commands:
First, build the workload
../../tools/firemarshal/marshal build vadd_tl.json vadd_rocc.json
This command builds two Linux-based workloads that can be booted on a RISC-V
SoC. They include a simple test that uses the vector addition accelerators that
were generated by centrifuge.
Next, you will want to install them:
../../tools/firemarshal/marshal install vadd_tl.json vadd_rocc.json
This generates a FireSim workload description for the provided benchmarks. You
may now use FireSim normally to launch the workloads. They will simply run the
benchmark automatically and shutdown (you do not need to interact with the
running workload at all).
## FireMarshal Workload Description Details
vadd\_tl.json and vadd\_rocc.json are FireMarshal workload descriptions. These
include all the information needed to build a working Linux-based benchmark.
Here is the workload description for the tilelink-based accelerator:
{
"name" : "vadd_tl",
"base" : "cf-buildroot.json",
"workdir" : "hardcoded/vadd_tl",
"host-init" : "build.sh",
"files" : [["vadd_tl", "/root/"]],
"command" : "/root/vadd_tl"
}
These options have the following effects:
- name: A unique name identifying this benchmark.
- base: We have provided you with a base workload that includes the necessary
changes to Linux and other common setup tasks. FireMarshal inherits this
configuration before building your workload.
- workdir: All file paths in this configuration file will be relative to this directory
- host-init: This script is run automatically before constructing the Linux
root filesystem. In this case, `hardcoded/vadd\_tl/build.sh` simply
cross-compiles the vector add example benchmark.
- files: This instructs FireMarshal to copy the benchmark into the target root filesystem.
- command: FireMarshal will configure the workload to automatically run this
command whenver it boots up. This allows for hands-free testing.
You can find more information on FireMarshal workloads in its
[documentation](https://firemarshal.readthedocs.io/en/latest/).

@ -1 +1 @@
Subproject commit 560165de57bcee5452602d64f5206fbf0f5d8059
Subproject commit 8f5d9162eb07e192dcb4d1562668c685dfa660c5

View File

@ -1,6 +1,6 @@
{
"name" : "cf-buildroot",
"base" : "br-base",
"base" : "br-base.json",
"workdir" : "cf-base",
"linux-src" : "linux"
}

View File

@ -0,0 +1,37 @@
ARCH = riscv64
CC :=$(ARCH)-unknown-linux-gnu-g++
CFLAGS += -fpermissive -I../
LDFLAGS += -fpermissive
DEPS := $(wildcard *.c) $(wildcard *.h)
src_files := $(TARGET).c
OBJECTS = $(src_files:.c=.o)
DEPS += $(OBJECTS)
all: $(TARGET)$(POSTFIX)
.PHONY: $(TARGET)$(POSTFIX)
$(TARGET)$(POSTFIX): Makefile $(DEPS)
$(CC) $(LDFLAGS) $(OBJECTS) -o $@
$(ARCH)-unknown-linux-gnu-objdump -D $@ > $(TARGET).asm
.c.o: $(DEPS)
$(CC) -c $(CFLAGS) $< -o $@
.cpp.o: $(DEPS)
$(CC) -c $(CFLAGS) $< -o $@
run:
spike pk $(TARGET).rv
print-%:
@echo $* = $($*)
clean:
rm -f $(OBJECTS) $(TARGET).rv $(TARGET).asm $(TARGET).ll $(TARGET).S
cleanall: clean
rm -f $(TARGET).inst $(TARGET) $(TARGET).driver

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=adpcm
FUNC=encode
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=adpcm_tl
FUNC=encode
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -0,0 +1,18 @@
VERBOSE= 1
TARGET ?=adpcm
LDFLAGS=
CFLAGS=-Wno-narrowing
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
#ifdef CUSTOM_INST
#include "rocc.h"
#endif
uint64_t decode_wrapper(uint64_t input_r) {
uint64_t ret_val;
#ifdef CUSTOM_INST
#define XCUSTOM_ACC 2
ROCC_INSTRUCTION_DS(XCUSTOM_ACC, ret_val, input_r, 0);
#endif
return ret_val;
}

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=adpcm
FUNC=encode
LDFLAGS=
CFLAGS=-Wno-narrowing
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -1156,48 +1156,28 @@ void
adpcm_main ()
{
int i, j;
uint64_t begin;
/* reset, initialize required memory */
/* reset, initialize required memory */
reset ();
j = 10;
#ifdef WRAP_encode
uint64_t begin;
begin = read_cycle();
for (i = 0; i < IN_END; i += 2) {
#ifdef CUSTOM_INST
for (i = 0; i < IN_END; i += 2)
{
compressed[i / 2] = encode_wrapper(test_data[i], test_data[i + 1]);
}
compressed[i / 2] = encode_wrapper(test_data[i], test_data[i + 1]);
#else
for (i = 0; i < IN_END; i += 2)
{
compressed[i / 2] = encode(test_data[i], test_data[i + 1]);
compressed[i / 2] = encode (test_data[i], test_data[i + 1]);
#endif
}
#endif //CUSTOM_INST
duration(begin, read_cycle());
#endif //WRAP_encode
#ifdef WRAP_decode
begin = read_cycle();
#if CUSTOM_INST
for (i = 0; i < IN_END; i += 2)
{
decode_wrapper(compressed[i / 2]);
for (i = 0; i < IN_END; i += 2) {
decode (compressed[i / 2]);
result[i] = xout1;
result[i + 1] = xout2;
}
#else
for (i = 0; i < IN_END; i += 2)
{
decode(compressed[i / 2]);
result[i] = xout1;
result[i + 1] = xout2;
}
#endif //CUSTOM_INST
duration(begin, read_cycle());
#endif //WRAP_decode
}
int

View File

@ -0,0 +1,13 @@
#ifdef CUSTOM_INST
#include "rocc.h"
#endif
uint64_t encode_wrapper(uint64_t xin1, uint64_t xin2) {
uint64_t ret_val;
#ifdef CUSTOM_INST
#define XCUSTOM_ACC 1
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, ret_val, xin1, xin2, 0);
#endif
return ret_val;
}

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=adpcm_tl
FUNC=encode
LDFLAGS=
CFLAGS=-Wno-narrowing
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

Binary file not shown.

View File

@ -63,11 +63,47 @@
/* */
/*************************************************************************/
#include <stdio.h>
#include <stdint.h>
#ifdef CUSTOM_DRIVER
#include "bm_wrapper.h"
#endif
#include "time.h"
//#include "mmio.h"
//#include "time.h"
#include "../custom_mmap/mmap_driver.c"
#include "../os_utils.h"
#define ACCEL_CONTROL 0x30000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x30004
#define ACCEL_A 0x30018
#define ACCEL_B 0x30020
#define ACCEL_RET 0x30010
int encode_accel (int a , int b){
//printf("start\n");
// printf("%d\n", reg_read32(ACCEL_CONTROL) );
// Disable interrupt for now
//reg_write32(ACCEL_INT, 0x0);
access_addr(ACCEL_INT, OUT, 0);
// Set up pointer a and pointer b address
access_addr(ACCEL_A, OUT, a);
access_addr(ACCEL_B, OUT, b);
//reg_write32(ACCEL_A, (uint32_t)a);
//reg_write32(ACCEL_B, (uint32_t)b);
// Write to ap_start to start the execution
//reg_write32(ACCEL_CONTROL, 0x1);
access_addr(ACCEL_CONTROL, OUT, 0x1);
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
// Done?
int done = 0;
while (!done){
//done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
}
int c = access_addr(ACCEL_RET, IN, 0x1);
return c;
}
int encode (int, int);
void decode (int);
@ -1167,18 +1203,21 @@ adpcm_main ()
j = 10;
uint64_t begin, end, dur;
begin = read_cycle();
int64_t begin, end, dur;
begin = read_cycle();
for (i = 0; i < IN_END; i += 2)
{
#ifdef CUSTOM_DRIVER
compressed[i / 2] = encode_wrapper (test_data[i], test_data[i + 1]);
#else
compressed[i / 2] = encode (test_data[i], test_data[i + 1]);
#endif
end = read_cycle();
}
//for (i = 0; i < 1; i += 2)
{
#ifdef CUSTOM_DRIVER
compressed[i / 2] = encode_accel (test_data[i], test_data[i + 1]);
#else
compressed[i / 2] = encode (test_data[i], test_data[i + 1]);
#endif
}
end = read_cycle();
duration(begin, end);
for (i = 0; i < IN_END; i += 2)
{
decode (compressed[i / 2]);

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=aes
FUNC=encrypt
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../makefiles/Makefile.bm.in
endif
endif

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=aes_tl
FUNC=encrypt
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=aes
FUNC=encrypt
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -60,20 +60,22 @@
*
*/
#include <stdio.h>
#include <stdint.h>
#include "time.h"
#ifdef CUSTOM_INST
#include "rocc.h"
#endif
#include "../custom_mmap/mmap_driver.c"
#include "../os_utils.h"
int main_result;
#ifdef CUSTOM_DRIVER
#include "bm_wrapper.h"
#endif
#include "aes.h"
#include "aes_enc.c"
#include "aes_dec.c"
#include "aes_key.c"
#include "aes_func.c"
/* ***************** main **************************** */
int
aes_main (void)
@ -118,21 +120,33 @@ aes_main (void)
key[14] = 79;
key[15] = 60;
int64_t begin, end, dur;
begin = read_cycle();
#ifdef CUSTOM_DRIVER
encrypt_wrapper(statemt, key, 128128);
uint64_t begin, end, dur;
encryp (statemt, key);
begin = read_cycle();
volatile int block;
#ifdef CUSTOM_INST
asm volatile ("fence.i");
#define XCUSTOM_ACC 1
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block , vtop_translate(statemt), vtop_translate(key), 0);
asm volatile ("fence.i");
#else
encrypt (statemt, key, 128128);
decryp (statemt, key);
#endif
end = read_cycle();
decrypt (statemt, key, 128128);
end = read_cycle();
printf ("\ndecrypto message\t");
for (int i = 0; i < 16; ++i)
{
if (statemt[i] < 16)
printf ("0");
printf ("%x", statemt[i]);
}
duration(begin, end);
return 0;
}
int
main ()
{

View File

@ -60,7 +60,7 @@
*
*/
int
decrypt (int statemt[32], int key[32])
decryp (int statemt[32], int key[32])
{
#pragma HLS INTERFACE ap_bus depth=10 port=statemt
#pragma HLS INTERFACE ap_bus depth=10 port=key
@ -132,7 +132,7 @@ decrypt (int statemt[32], int key[32])
// printf ("0");
// printf ("%x", statemt[i]);
// }
//
//for (i = 0; i < 16; i++)
// main_result += (statemt[i] != out_dec_statemt[i]);

View File

@ -0,0 +1,131 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_enc.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
/* ******* encrypto ************ */
int
encryp (int statemt[32], int key[32])
{
#pragma HLS INTERFACE ap_bus depth=10 port=statemt
#pragma HLS INTERFACE ap_bus depth=10 port=key
int type = 128128;
int i;
/*
+--------------------------------------------------------------------------+
| * Test Vector (added for CHStone) |
| out_enc_statemt : expected output data for "encrypt" |
+--------------------------------------------------------------------------+
*/
const int out_enc_statemt[16] =
{ 0x39, 0x25, 0x84, 0x1d, 0x2, 0xdc, 0x9, 0xfb, 0xdc, 0x11, 0x85, 0x97,
0x19, 0x6a, 0xb, 0x32
};
KeySchedule (type, key);
switch (type)
{
case 128128:
round_val = 0;
nb = 4;
break;
case 192128:
round_val = 2;
nb = 4;
break;
case 256128:
round_val = 4;
nb = 4;
break;
case 128192:
case 192192:
round_val = 2;
nb = 6;
break;
case 256192:
round_val = 4;
nb = 6;
break;
case 128256:
case 192256:
case 256256:
round_val = 4;
nb = 8;
break;
}
AddRoundKey (statemt, type, 0);
for (i = 1; i <= round_val + 9; ++i)
{
ByteSub_ShiftRow (statemt, nb);
MixColumn_AddRoundKey (statemt, nb, i);
}
ByteSub_ShiftRow (statemt, nb);
AddRoundKey (statemt, type, i);
printf ("encrypted message \t");
for (i = 0; i < nb * 4; ++i)
{
if (statemt[i] < 16)
printf ("0");
printf ("%x", statemt[i]);
}
return 0;
}

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=aes
FUNC=encrypt
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -60,15 +60,15 @@
*
*/
#include <stdio.h>
#include<stdint.h>
#include "../os_utils.h"
#include "../custom_mmap/mmap_driver.c"
#ifdef CUSTOM_INST
#include "bm_wrapper.h"
#include "rocc.h"
#endif
int main_result;
#include "time.h"
#include "aes.h"
#include "aes_enc.c"
#include "aes_dec.c"
@ -120,35 +120,56 @@ aes_main (void)
key[14] = 79;
key[15] = 60;
uint64_t begin;
#ifdef WRAP_encrypt
begin = read_cycle();
int64_t begin, end, dur;
volatile int block;
begin = read_cycle();
#ifdef CUSTOM_INST
encrypt_wrapper(statemt, key);
asm volatile ("fence.i");
#define XCUSTOM_ACC 0
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block, vtop_translate(statemt), vtop_translate(key), 0);
asm volatile ("fence.i");
#else
encrypt(statemt, key);
#endif //CUSTOM_INST
duration(begin, read_cycle());
#endif //WRAP_encrypt
encrypt (statemt, key);
#endif
end = read_cycle();
duration(begin, end);
printf ("\nencrypto message\t");
for (int i = 0; i < 16; ++i)
{
if (statemt[i] < 16)
printf ("0");
printf ("%x", statemt[i]);
}
//#ifdef CUSTOM_INST
// #define XCUSTOM_ACC 0
// ROCC_INSTRUCTION_SS(XCUSTOM_ACC, statemt, key, 0);
//#else
// decrypt (statemt, key);
//#endif
//
decrypt (statemt, key);
printf ("\ndecrypto message\t");
for (int i = 0; i < 16; ++i)
{
if (statemt[i] < 16)
printf ("0");
printf ("%x", statemt[i]);
}
#ifdef WRAP_decrypt
begin = read_cycle();
#ifdef CUSTOM_INST
decrypt_wrapper(statemt, key);
#else //CUSTOM_INST
decrypt(statemt, key);
#endif //WRAP_decrypt
duration(begin, read_cycle());
#endif
return 0;
}
int main ()
int
main ()
{
main_result = 0;
aes_main ();
printf ("\n%d\n", main_result);
return main_result;
}
main_result = 0;
aes_main ();
printf ("\n%d\n", main_result);
return main_result;
}

View File

@ -0,0 +1,83 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes.h */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
/* ************* data type define ************************* */
int type;
int nb;
int round_val;
int key[32];
int statemt[32];
int word[4][120];
/* key generate */
int KeySchedule (int, int *);
int SubByte (int);
/* encrypto decrypto */
void ByteSub_ShiftRow (int *, int);
void InversShiftRow_ByteSub (int *, int);
int MixColumn_AddRoundKey (int *, int, int);
int AddRoundKey_InversMixColumn (int *, int, int);
int AddRoundKey (int *, int, int);
int encrypt (int *, int *);
int decrypt (int *, int *);

View File

@ -0,0 +1,140 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_dec.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
int
decrypt (int statemt[32], int key[32])
{
#pragma HLS INTERFACE ap_bus depth=10 port=statemt
#pragma HLS INTERFACE ap_bus depth=10 port=key
int type = 128128;
int i;
/*
+--------------------------------------------------------------------------+
| * Test Vector (added for CHStone) |
| out_enc_statemt : expected output data for "decrypt" |
+--------------------------------------------------------------------------+
*/
const int out_dec_statemt[16] =
{ 0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2,
0xe0, 0x37, 0x7, 0x34
};
KeySchedule (type, key);
switch (type)
{
case 128128:
round_val = 10;
nb = 4;
break;
case 128192:
case 192192:
round_val = 12;
nb = 6;
break;
case 192128:
round_val = 12;
nb = 4;
break;
case 128256:
case 192256:
round_val = 14;
nb = 8;
break;
case 256128:
round_val = 14;
nb = 4;
break;
case 256192:
round_val = 14;
nb = 6;
break;
case 256256:
round_val = 14;
nb = 8;
break;
}
AddRoundKey (statemt, type, round_val);
InversShiftRow_ByteSub (statemt, nb);
for (i = round_val - 1; i >= 1; --i)
{
AddRoundKey_InversMixColumn (statemt, nb, i);
InversShiftRow_ByteSub (statemt, nb);
}
AddRoundKey (statemt, type, 0);
// printf ("\ndecrypto message\t");
// for (i = 0; i < ((type % 1000) / 8); ++i)
// {
// if (statemt[i] < 16)
// printf ("0");
// printf ("%x", statemt[i]);
// }
//
for (i = 0; i < 16; i++)
main_result += (statemt[i] != out_dec_statemt[i]);
return 0;
}

View File

@ -126,6 +126,6 @@ encrypt (int statemt[32], int key[32])
// printf ("0");
// printf ("%x", statemt[i]);
// }
//
return 0;
}

View File

@ -0,0 +1,542 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_function.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
const int Sbox[16][16] = {
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
0xfe, 0xd7, 0xab, 0x76},
{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf,
0x9c, 0xa4, 0x72, 0xc0},
{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1,
0x71, 0xd8, 0x31, 0x15},
{0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
0xeb, 0x27, 0xb2, 0x75},
{0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3,
0x29, 0xe3, 0x2f, 0x84},
{0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39,
0x4a, 0x4c, 0x58, 0xcf},
{0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
0x50, 0x3c, 0x9f, 0xa8},
{0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21,
0x10, 0xff, 0xf3, 0xd2},
{0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d,
0x64, 0x5d, 0x19, 0x73},
{0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
0xde, 0x5e, 0x0b, 0xdb},
{0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62,
0x91, 0x95, 0xe4, 0x79},
{0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea,
0x65, 0x7a, 0xae, 0x08},
{0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
0x4b, 0xbd, 0x8b, 0x8a},
{0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9,
0x86, 0xc1, 0x1d, 0x9e},
{0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9,
0xce, 0x55, 0x28, 0xdf},
{0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
0xb0, 0x54, 0xbb, 0x16}
};
const int invSbox[16][16] = {
{0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
0x81, 0xf3, 0xd7, 0xfb},
{0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44,
0xc4, 0xde, 0xe9, 0xcb},
{0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b,
0x42, 0xfa, 0xc3, 0x4e},
{0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
0x6d, 0x8b, 0xd1, 0x25},
{0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc,
0x5d, 0x65, 0xb6, 0x92},
{0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57,
0xa7, 0x8d, 0x9d, 0x84},
{0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
0xb8, 0xb3, 0x45, 0x06},
{0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03,
0x01, 0x13, 0x8a, 0x6b},
{0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce,
0xf0, 0xb4, 0xe6, 0x73},
{0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
0x1c, 0x75, 0xdf, 0x6e},
{0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e,
0xaa, 0x18, 0xbe, 0x1b},
{0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe,
0x78, 0xcd, 0x5a, 0xf4},
{0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
0x27, 0x80, 0xec, 0x5f},
{0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f,
0x93, 0xc9, 0x9c, 0xef},
{0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c,
0x83, 0x53, 0x99, 0x61},
{0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
0x55, 0x21, 0x0c, 0x7d}
};
/* ********* ByteSub & ShiftRow ********* */
void
ByteSub_ShiftRow (int statemt[32], int nb)
{
int temp;
switch (nb)
{
case 4:
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = temp;
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = temp;
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = temp;
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = temp;
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
break;
case 6:
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = temp;
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = temp;
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = temp;
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = temp;
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = temp;
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = temp;
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
break;
case 8:
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = Sbox[statemt[25] >> 4][statemt[25] & 0xf];
statemt[25] = Sbox[statemt[29] >> 4][statemt[29] & 0xf];
statemt[29] = temp;
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = Sbox[statemt[26] >> 4][statemt[26] & 0xf];
statemt[26] = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = Sbox[statemt[30] >> 4][statemt[30] & 0xf];
statemt[30] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = temp;
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = temp;
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = temp;
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = Sbox[statemt[27] >> 4][statemt[27] & 0xf];
statemt[27] = temp;
temp = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = Sbox[statemt[31] >> 4][statemt[31] & 0xf];
statemt[31] = temp;
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
statemt[24] = Sbox[statemt[24] >> 4][statemt[24] & 0xf];
statemt[28] = Sbox[statemt[28] >> 4][statemt[28] & 0xf];
break;
}
}
int
SubByte (int in)
{
return Sbox[(in / 16)][(in % 16)];
}
/* ********* InversShiftRow & ByteSub ********* */
void
InversShiftRow_ByteSub (int statemt[32], int nb)
{
int temp;
switch (nb)
{
case 4:
temp = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = temp;
temp = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = temp;
temp = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = temp;
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = temp;
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
break;
case 6:
temp = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = temp;
temp = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = temp;
temp = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = temp;
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = temp;
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = temp;
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = temp;
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
break;
case 8:
temp = invSbox[statemt[29] >> 4][statemt[29] & 0xf];
statemt[29] = invSbox[statemt[25] >> 4][statemt[25] & 0xf];
statemt[25] = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = temp;
temp = invSbox[statemt[30] >> 4][statemt[30] & 0xf];
statemt[30] = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = invSbox[statemt[26] >> 4][statemt[26] & 0xf];
statemt[26] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = temp;
temp = invSbox[statemt[31] >> 4][statemt[31] & 0xf];
statemt[31] = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = temp;
temp = invSbox[statemt[27] >> 4][statemt[27] & 0xf];
statemt[27] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = temp;
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = temp;
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = temp;
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
statemt[24] = invSbox[statemt[24] >> 4][statemt[24] & 0xf];
statemt[28] = invSbox[statemt[28] >> 4][statemt[28] & 0xf];
break;
}
}
/* ******** MixColumn ********** */
int
MixColumn_AddRoundKey (int statemt[32], int nb, int n)
{
int ret[8 * 4], j;
register int x;
for (j = 0; j < nb; ++j)
{
ret[j * 4] = (statemt[j * 4] << 1);
if ((ret[j * 4] >> 8) == 1)
ret[j * 4] ^= 283;
x = statemt[1 + j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[j * 4] ^= (x ^ 283);
else
ret[j * 4] ^= x;
ret[j * 4] ^=
statemt[3 + j * 4] ^ statemt[2 + j * 4] ^ word[0][j + nb * n];
ret[1 + j * 4] = (statemt[1 + j * 4] << 1);
if ((ret[1 + j * 4] >> 8) == 1)
ret[1 + j * 4] ^= 283;
x = statemt[2 + j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[1 + j * 4] ^= (x ^ 283);
else
ret[1 + j * 4] ^= x;
ret[1 + j * 4] ^=
statemt[3 + j * 4] ^ statemt[j * 4] ^ word[1][j + nb * n];
ret[2 + j * 4] = (statemt[2 + j * 4] << 1);
if ((ret[2 + j * 4] >> 8) == 1)
ret[2 + j * 4] ^= 283;
x = statemt[3 + j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[2 + j * 4] ^= (x ^ 283);
else
ret[2 + j * 4] ^= x;
ret[2 + j * 4] ^=
statemt[j * 4] ^ statemt[1 + j * 4] ^ word[2][j + nb * n];
ret[3 + j * 4] = (statemt[3 + j * 4] << 1);
if ((ret[3 + j * 4] >> 8) == 1)
ret[3 + j * 4] ^= 283;
x = statemt[j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[3 + j * 4] ^= (x ^ 283);
else
ret[3 + j * 4] ^= x;
ret[3 + j * 4] ^=
statemt[1 + j * 4] ^ statemt[2 + j * 4] ^ word[3][j + nb * n];
}
for (j = 0; j < nb; ++j)
{
statemt[2 + j * 4] = ret[2 + j * 4];
statemt[j * 4] = ret[j * 4];
statemt[1 + j * 4] = ret[1 + j * 4];
statemt[3 + j * 4] = ret[3 + j * 4];
}
return 0;
}
/* ******** InversMixColumn ********** */
int
AddRoundKey_InversMixColumn (int statemt[32], int nb, int n)
{
int ret[8 * 4], i, j;
register int x;
for (j = 0; j < nb; ++j)
{
statemt[j * 4] ^= word[0][j + nb * n];
statemt[1 + j * 4] ^= word[1][j + nb * n];
statemt[2 + j * 4] ^= word[2][j + nb * n];
statemt[3 + j * 4] ^= word[3][j + nb * n];
}
for (j = 0; j < nb; ++j)
for (i = 0; i < 4; ++i)
{
x = (statemt[i + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[i + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[i + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
ret[i + j * 4] = x;
x = (statemt[(i + 1) % 4 + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 1) % 4 + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 1) % 4 + j * 4];
ret[i + j * 4] ^= x;
x = (statemt[(i + 2) % 4 + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 2) % 4 + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 2) % 4 + j * 4];
ret[i + j * 4] ^= x;
x = (statemt[(i + 3) % 4 + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 3) % 4 + j * 4];
ret[i + j * 4] ^= x;
}
for (i = 0; i < nb; ++i)
{
statemt[i * 4] = ret[i * 4];
statemt[1 + i * 4] = ret[1 + i * 4];
statemt[2 + i * 4] = ret[2 + i * 4];
statemt[3 + i * 4] = ret[3 + i * 4];
}
return 0;
}
/* ******** AddRoundKey ********** */
int
AddRoundKey (int statemt[32], int type, int n)
{
int j, nb;
switch (type)
{
case 128128:
case 192128:
case 256128:
nb = 4;
break;
case 128192:
case 192192:
case 256192:
nb = 6;
break;
case 128256:
case 192256:
case 256256:
nb = 8;
break;
}
for (j = 0; j < nb; ++j)
{
statemt[j * 4] ^= word[0][j + nb * n];
statemt[1 + j * 4] ^= word[1][j + nb * n];
statemt[2 + j * 4] ^= word[2][j + nb * n];
statemt[3 + j * 4] ^= word[3][j + nb * n];
}
return 0;
}

View File

@ -0,0 +1,165 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_key.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
/* **************key generate & key display *******************/
const int Rcon0[30] = {
0x01, 0x02, 0x04, 0x08,
0x10, 0x20, 0x40, 0x80,
0x1b, 0x36, 0x6c, 0xd8,
0xab, 0x4d, 0x9a, 0x2f,
0x5e, 0xbc, 0x63, 0xc6,
0x97, 0x35, 0x6a, 0xd4,
0xb3, 0x7d, 0xfa, 0xef,
0xc5, 0x91,
};
/* **************** key expand ************************ */
int
KeySchedule (int type, int key[32])
{
int nk, nb, round_val;
int i, j, temp[4];
switch (type)
{
case 128128:
nk = 4;
nb = 4;
round_val = 10;
break;
case 128192:
nk = 4;
nb = 6;
round_val = 12;
break;
case 128256:
nk = 4;
nb = 8;
round_val = 14;
break;
case 192128:
nk = 6;
nb = 4;
round_val = 12;
break;
case 192192:
nk = 6;
nb = 6;
round_val = 12;
break;
case 192256:
nk = 6;
nb = 8;
round_val = 14;
break;
case 256128:
nk = 8;
nb = 4;
round_val = 14;
break;
case 256192:
nk = 8;
nb = 6;
round_val = 14;
break;
case 256256:
nk = 8;
nb = 8;
round_val = 14;
break;
default:
return -1;
}
for (i = 0; i < 4; ++i)
for (j = 0; j < nk; ++j)
//for (i = 0; i < 4; ++i)
/* 0 word */
word[i][j] = key[i + j * 4];
/* expanded key is generated */
for (j = nk; j < nb * (round_val + 1); ++j)
{
/* RotByte */
if ((j % nk) == 0)
{
temp[0] = SubByte (word[1][j - 1]) ^ Rcon0[(j / nk) - 1];
temp[1] = SubByte (word[2][j - 1]);
temp[2] = SubByte (word[3][j - 1]);
temp[3] = SubByte (word[0][j - 1]);
}
if ((j % nk) != 0)
{
temp[0] = word[0][j - 1];
temp[1] = word[1][j - 1];
temp[2] = word[2][j - 1];
temp[3] = word[3][j - 1];
}
if (nk > 6 && j % nk == 4)
for (i = 0; i < 4; ++i)
temp[i] = SubByte (temp[i]);
for (i = 0; i < 4; ++i)
word[i][j] = word[i][j - nk] ^ temp[i];
}
return 0;
}

View File

@ -0,0 +1,98 @@
// Based on code by Schuyler Eldridge. Copyright (c) Boston University
// https://github.com/seldridge/rocket-rocc-examples/blob/master/src/main/c/rocc.h
#ifndef SRC_MAIN_C_ROCC_H
#define SRC_MAIN_C_ROCC_H
#include <stdint.h>
#define STR1(x) #x
#define STR(x) STR1(x)
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
#define CUSTOM_0 0b0001011
#define CUSTOM_1 0b0101011
#define CUSTOM_2 0b1011011
#define CUSTOM_3 0b1111011
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
CUSTOMX_OPCODE(X) | \
(rd << (7)) | \
(xs2 << (7+5)) | \
(xs1 << (7+5+1)) | \
(xd << (7+5+2)) | \
(rs1 << (7+5+3)) | \
(rs2 << (7+5+3+5)) | \
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
// Standard macro that passes rd, rs1, and rs2 via registers
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
#define ROCC_INSTRUCTION_D(X, rd, funct) \
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
#define ROCC_INSTRUCTION(X, funct) \
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
// rd, rs1, and rs2 are data
// rd_n, rs_1, and rs2_n are the register numbers to use
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
: "=r" (rd_) \
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
: "=r" (rd_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
}
#endif // SRC_MAIN_C_ACCUMULATOR_H

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=aes_tl
FUNC=encrypt
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -0,0 +1,138 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_enc.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
/* ******* encrypto ************ */
int
encrypt (int statemt[32], int key[32], int type)
{
#pragma HLS INTERFACE m_axi port=statemt offset=slave bundle=gmem0 // Direct is for AXI with full 32 bit address space
#pragma HLS INTERFACE m_axi port=key offset=slave bundle=gmem0 // Slave is for AXI4Lite, with burst mode disabled
#pragma HLS INTERFACE s_axilite port=statemt bundle=control
#pragma HLS INTERFACE s_axilite port=key bundle=control
#pragma HLS INTERFACE s_axilite port=type bundle=control
#pragma HLS INTERFACE s_axilite port=return bundle=control
int i;
/*
+--------------------------------------------------------------------------+
| * Test Vector (added for CHStone) |
| out_enc_statemt : expected output data for "encrypt" |
+--------------------------------------------------------------------------+
*/
const int out_enc_statemt[16] =
{ 0x39, 0x25, 0x84, 0x1d, 0x2, 0xdc, 0x9, 0xfb, 0xdc, 0x11, 0x85, 0x97,
0x19, 0x6a, 0xb, 0x32
};
KeySchedule (type, key);
switch (type)
{
case 128128:
round_val = 0;
nb = 4;
break;
case 192128:
round_val = 2;
nb = 4;
break;
case 256128:
round_val = 4;
nb = 4;
break;
case 128192:
case 192192:
round_val = 2;
nb = 6;
break;
case 256192:
round_val = 4;
nb = 6;
break;
case 128256:
case 192256:
case 256256:
round_val = 4;
nb = 8;
break;
}
AddRoundKey (statemt, type, 0);
for (i = 1; i <= round_val + 9; ++i)
{
ByteSub_ShiftRow (statemt, nb);
MixColumn_AddRoundKey (statemt, nb, i);
}
ByteSub_ShiftRow (statemt, nb);
AddRoundKey (statemt, type, i);
printf ("encrypted message \t");
for (i = 0; i < nb * 4; ++i)
{
if (statemt[i] < 16)
printf ("0");
printf ("%x", statemt[i]);
}
for (i = 0; i < 16; i++)
main_result += (statemt[i] != out_enc_statemt[i]);
return 0;
}

Binary file not shown.

View File

@ -0,0 +1,203 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include <stdio.h>
#include "../os_utils.h"
#define ACCEL_CONTROL 0x40000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x40004
#define ACCEL_STATEMT 0x40018
#define ACCEL_KEY 0x40024
#define ACCEL_TYPE 0x40030
#define ACCEL_RET 0x40010
int main_result;
#include "aes.h"
#include "aes_enc.c"
#include "aes_dec.c"
#include "aes_key.c"
#include "aes_func.c"
#include "../custom_mmap/mmap_driver.c"
int decrypt_accel(int* statemt, int* key, int type){
uint64_t addr;
// Disable interrupt for now
//reg_write32(ACCEL_INT, 0x0);
access_addr(ACCEL_INT, OUT, 0);
//int fd1 = mmap_init();
//int fd2 = mmap_init();
//char * addr1 = copy_to_buffer((char*)statemt, 32 * sizeof(int), fd1);
addr = vtop_translate(statemt);
//addr = vtop_translate(addr1);
access_addr(ACCEL_STATEMT, OUT, addr);
access_addr(ACCEL_STATEMT + 4, OUT, addr >> 32);
// char * addr2 = copy_to_buffer((char*)key, 32 * sizeof(int), fd2);
addr = vtop_translate(key);
//addr = vtop_translate(addr2);
access_addr(ACCEL_KEY, OUT, addr);
access_addr(ACCEL_KEY + 4, OUT, addr >> 32);
//addr = vtop_translate(key);
access_addr(ACCEL_TYPE, OUT, type);
// Set up pointer a and pointer b address
// reg_write32(ACCEL_STATEMT, (uint32_t)statemt);
// reg_write32(ACCEL_KEY, (uint32_t)key);
// reg_write32(ACCEL_TYPE, (uint32_t)type);
// Write to ap_start to start the execution
access_addr(ACCEL_CONTROL, OUT, 0x1);
// reg_write32(ACCEL_CONTROL, 0x1);
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
// Done?
int done = 0;
while (!done){
// done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
}
// memcpy(statemt, addr1, 32 * 4 );
// memcpy(key, addr2, 32 * 4 );
//
// mmap_delete(fd1, addr1);
// mmap_delete(fd2, addr2);
//
//int c = reg_read32(ACCEL_RET);
return 0;
}
/* ***************** main **************************** */
int
aes_main (void)
{
/*
+--------------------------------------------------------------------------+
| * Test Vectors (added for CHStone) |
| statemt, key : input data |
+--------------------------------------------------------------------------+
*/
statemt[0] = 50;
statemt[1] = 67;
statemt[2] = 246;
statemt[3] = 168;
statemt[4] = 136;
statemt[5] = 90;
statemt[6] = 48;
statemt[7] = 141;
statemt[8] = 49;
statemt[9] = 49;
statemt[10] = 152;
statemt[11] = 162;
statemt[12] = 224;
statemt[13] = 55;
statemt[14] = 7;
statemt[15] = 52;
key[0] = 43;
key[1] = 126;
key[2] = 21;
key[3] = 22;
key[4] = 40;
key[5] = 174;
key[6] = 210;
key[7] = 166;
key[8] = 171;
key[9] = 247;
key[10] = 21;
key[11] = 136;
key[12] = 9;
key[13] = 207;
key[14] = 79;
key[15] = 60;
int64_t begin, end, dur;
encrypt (statemt, key, 128128);
begin = read_cycle();
#ifdef CUSTOM_DRIVER
decrypt_accel(statemt, key, 128128);
#else
decrypt (statemt, key, 128128);
#endif
end = read_cycle();
duration(begin, end);
return 0;
}
int
main ()
{
main_result = 0;
aes_main ();
printf ("\n%d\n", main_result);
return main_result;
}

View File

@ -0,0 +1,21 @@
#ifndef __TIME_H__
#define __TIME_H__
#include<stdio.h>
//#include "encoding.h"
uint64_t read_cycle() {
uint64_t rd = 0;
asm volatile("rdcycle %0 " : "=r"(rd));
//rd = rdcycle();
//printf("Time: %ld. \n", rd);
return rd;
}
uint64_t duration(uint64_t start, uint64_t end){
uint64_t dur = end - start;
printf("Duration: %ld. \n", dur);
return dur;
}
#endif

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=aes_tl
FUNC=encrypt
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -0,0 +1,83 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes.h */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
/* ************* data type define ************************* */
int type;
int nb;
int round_val;
int key[32];
int statemt[32];
int word[4][120];
/* key generate */
int KeySchedule (int, int *);
int SubByte (int);
/* encrypto decrypto */
void ByteSub_ShiftRow (int *, int);
void InversShiftRow_ByteSub (int *, int);
int MixColumn_AddRoundKey (int *, int, int);
int AddRoundKey_InversMixColumn (int *, int, int);
int AddRoundKey (int *, int, int);
int encrypt (int *, int *, int);
int decrypt (int *, int *, int);

View File

@ -0,0 +1,145 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_dec.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
int
decrypt (int statemt[32], int key[32], int type)
{
#pragma HLS INTERFACE m_axi port=statemt offset=slave bundle=gmem0 // Direct is for AXI with full 32 bit address space
#pragma HLS INTERFACE m_axi port=key offset=slave bundle=gmem0 // Slave is for AXI4Lite, with burst mode disabled
#pragma HLS INTERFACE s_axilite port=statemt bundle=control
#pragma HLS INTERFACE s_axilite port=key bundle=control
#pragma HLS INTERFACE s_axilite port=type bundle=control
#pragma HLS INTERFACE s_axilite port=return bundle=control
int i;
/*
+--------------------------------------------------------------------------+
| * Test Vector (added for CHStone) |
| out_enc_statemt : expected output data for "decrypt" |
+--------------------------------------------------------------------------+
*/
const int out_dec_statemt[16] =
{ 0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2,
0xe0, 0x37, 0x7, 0x34
};
KeySchedule (type, key);
switch (type)
{
case 128128:
round_val = 10;
nb = 4;
break;
case 128192:
case 192192:
round_val = 12;
nb = 6;
break;
case 192128:
round_val = 12;
nb = 4;
break;
case 128256:
case 192256:
round_val = 14;
nb = 8;
break;
case 256128:
round_val = 14;
nb = 4;
break;
case 256192:
round_val = 14;
nb = 6;
break;
case 256256:
round_val = 14;
nb = 8;
break;
}
AddRoundKey (statemt, type, round_val);
InversShiftRow_ByteSub (statemt, nb);
for (i = round_val - 1; i >= 1; --i)
{
AddRoundKey_InversMixColumn (statemt, nb, i);
InversShiftRow_ByteSub (statemt, nb);
}
AddRoundKey (statemt, type, 0);
printf ("\ndecrypto message\t");
for (i = 0; i < ((type % 1000) / 8); ++i)
{
if (statemt[i] < 16)
printf ("0");
printf ("%x", statemt[i]);
}
for (i = 0; i < 16; i++)
main_result += (statemt[i] != out_dec_statemt[i]);
return 0;
}

View File

@ -130,7 +130,7 @@ encrypt (int statemt[32], int key[32], int type)
// printf ("0");
// printf ("%x", statemt[i]);
// }
//
for (i = 0; i < 16; i++)
main_result += (statemt[i] != out_enc_statemt[i]);

View File

@ -0,0 +1,542 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_function.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
const int Sbox[16][16] = {
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
0xfe, 0xd7, 0xab, 0x76},
{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf,
0x9c, 0xa4, 0x72, 0xc0},
{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1,
0x71, 0xd8, 0x31, 0x15},
{0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
0xeb, 0x27, 0xb2, 0x75},
{0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3,
0x29, 0xe3, 0x2f, 0x84},
{0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39,
0x4a, 0x4c, 0x58, 0xcf},
{0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
0x50, 0x3c, 0x9f, 0xa8},
{0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21,
0x10, 0xff, 0xf3, 0xd2},
{0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d,
0x64, 0x5d, 0x19, 0x73},
{0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
0xde, 0x5e, 0x0b, 0xdb},
{0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62,
0x91, 0x95, 0xe4, 0x79},
{0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea,
0x65, 0x7a, 0xae, 0x08},
{0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
0x4b, 0xbd, 0x8b, 0x8a},
{0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9,
0x86, 0xc1, 0x1d, 0x9e},
{0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9,
0xce, 0x55, 0x28, 0xdf},
{0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
0xb0, 0x54, 0xbb, 0x16}
};
const int invSbox[16][16] = {
{0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
0x81, 0xf3, 0xd7, 0xfb},
{0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44,
0xc4, 0xde, 0xe9, 0xcb},
{0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b,
0x42, 0xfa, 0xc3, 0x4e},
{0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
0x6d, 0x8b, 0xd1, 0x25},
{0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc,
0x5d, 0x65, 0xb6, 0x92},
{0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57,
0xa7, 0x8d, 0x9d, 0x84},
{0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
0xb8, 0xb3, 0x45, 0x06},
{0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03,
0x01, 0x13, 0x8a, 0x6b},
{0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce,
0xf0, 0xb4, 0xe6, 0x73},
{0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
0x1c, 0x75, 0xdf, 0x6e},
{0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e,
0xaa, 0x18, 0xbe, 0x1b},
{0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe,
0x78, 0xcd, 0x5a, 0xf4},
{0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
0x27, 0x80, 0xec, 0x5f},
{0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f,
0x93, 0xc9, 0x9c, 0xef},
{0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c,
0x83, 0x53, 0x99, 0x61},
{0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
0x55, 0x21, 0x0c, 0x7d}
};
/* ********* ByteSub & ShiftRow ********* */
void
ByteSub_ShiftRow (int statemt[32], int nb)
{
int temp;
switch (nb)
{
case 4:
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = temp;
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = temp;
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = temp;
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = temp;
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
break;
case 6:
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = temp;
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = temp;
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = temp;
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = temp;
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = temp;
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = temp;
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
break;
case 8:
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = Sbox[statemt[25] >> 4][statemt[25] & 0xf];
statemt[25] = Sbox[statemt[29] >> 4][statemt[29] & 0xf];
statemt[29] = temp;
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = Sbox[statemt[26] >> 4][statemt[26] & 0xf];
statemt[26] = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = Sbox[statemt[30] >> 4][statemt[30] & 0xf];
statemt[30] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = temp;
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = temp;
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = temp;
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = Sbox[statemt[27] >> 4][statemt[27] & 0xf];
statemt[27] = temp;
temp = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = Sbox[statemt[31] >> 4][statemt[31] & 0xf];
statemt[31] = temp;
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
statemt[24] = Sbox[statemt[24] >> 4][statemt[24] & 0xf];
statemt[28] = Sbox[statemt[28] >> 4][statemt[28] & 0xf];
break;
}
}
int
SubByte (int in)
{
return Sbox[(in / 16)][(in % 16)];
}
/* ********* InversShiftRow & ByteSub ********* */
void
InversShiftRow_ByteSub (int statemt[32], int nb)
{
int temp;
switch (nb)
{
case 4:
temp = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = temp;
temp = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = temp;
temp = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = temp;
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = temp;
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
break;
case 6:
temp = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = temp;
temp = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = temp;
temp = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = temp;
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = temp;
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = temp;
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = temp;
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
break;
case 8:
temp = invSbox[statemt[29] >> 4][statemt[29] & 0xf];
statemt[29] = invSbox[statemt[25] >> 4][statemt[25] & 0xf];
statemt[25] = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
statemt[1] = temp;
temp = invSbox[statemt[30] >> 4][statemt[30] & 0xf];
statemt[30] = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
statemt[18] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
statemt[6] = invSbox[statemt[26] >> 4][statemt[26] & 0xf];
statemt[26] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
statemt[14] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
statemt[2] = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
statemt[22] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
statemt[10] = temp;
temp = invSbox[statemt[31] >> 4][statemt[31] & 0xf];
statemt[31] = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
statemt[15] = temp;
temp = invSbox[statemt[27] >> 4][statemt[27] & 0xf];
statemt[27] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
statemt[11] = temp;
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
statemt[23] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
statemt[7] = temp;
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
statemt[19] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
statemt[3] = temp;
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
statemt[24] = invSbox[statemt[24] >> 4][statemt[24] & 0xf];
statemt[28] = invSbox[statemt[28] >> 4][statemt[28] & 0xf];
break;
}
}
/* ******** MixColumn ********** */
int
MixColumn_AddRoundKey (int statemt[32], int nb, int n)
{
int ret[8 * 4], j;
register int x;
for (j = 0; j < nb; ++j)
{
ret[j * 4] = (statemt[j * 4] << 1);
if ((ret[j * 4] >> 8) == 1)
ret[j * 4] ^= 283;
x = statemt[1 + j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[j * 4] ^= (x ^ 283);
else
ret[j * 4] ^= x;
ret[j * 4] ^=
statemt[2 + j * 4] ^ statemt[3 + j * 4] ^ word[0][j + nb * n];
ret[1 + j * 4] = (statemt[1 + j * 4] << 1);
if ((ret[1 + j * 4] >> 8) == 1)
ret[1 + j * 4] ^= 283;
x = statemt[2 + j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[1 + j * 4] ^= (x ^ 283);
else
ret[1 + j * 4] ^= x;
ret[1 + j * 4] ^=
statemt[3 + j * 4] ^ statemt[j * 4] ^ word[1][j + nb * n];
ret[2 + j * 4] = (statemt[2 + j * 4] << 1);
if ((ret[2 + j * 4] >> 8) == 1)
ret[2 + j * 4] ^= 283;
x = statemt[3 + j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[2 + j * 4] ^= (x ^ 283);
else
ret[2 + j * 4] ^= x;
ret[2 + j * 4] ^=
statemt[j * 4] ^ statemt[1 + j * 4] ^ word[2][j + nb * n];
ret[3 + j * 4] = (statemt[3 + j * 4] << 1);
if ((ret[3 + j * 4] >> 8) == 1)
ret[3 + j * 4] ^= 283;
x = statemt[j * 4];
x ^= (x << 1);
if ((x >> 8) == 1)
ret[3 + j * 4] ^= (x ^ 283);
else
ret[3 + j * 4] ^= x;
ret[3 + j * 4] ^=
statemt[1 + j * 4] ^ statemt[2 + j * 4] ^ word[3][j + nb * n];
}
for (j = 0; j < nb; ++j)
{
statemt[j * 4] = ret[j * 4];
statemt[1 + j * 4] = ret[1 + j * 4];
statemt[2 + j * 4] = ret[2 + j * 4];
statemt[3 + j * 4] = ret[3 + j * 4];
}
return 0;
}
/* ******** InversMixColumn ********** */
int
AddRoundKey_InversMixColumn (int statemt[32], int nb, int n)
{
int ret[8 * 4], i, j;
register int x;
for (j = 0; j < nb; ++j)
{
statemt[j * 4] ^= word[0][j + nb * n];
statemt[1 + j * 4] ^= word[1][j + nb * n];
statemt[2 + j * 4] ^= word[2][j + nb * n];
statemt[3 + j * 4] ^= word[3][j + nb * n];
}
for (j = 0; j < nb; ++j)
for (i = 0; i < 4; ++i)
{
x = (statemt[i + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[i + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[i + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
ret[i + j * 4] = x;
x = (statemt[(i + 1) % 4 + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 1) % 4 + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 1) % 4 + j * 4];
ret[i + j * 4] ^= x;
x = (statemt[(i + 2) % 4 + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 2) % 4 + j * 4];
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 2) % 4 + j * 4];
ret[i + j * 4] ^= x;
x = (statemt[(i + 3) % 4 + j * 4] << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x = (x << 1);
if ((x >> 8) == 1)
x ^= 283;
x ^= statemt[(i + 3) % 4 + j * 4];
ret[i + j * 4] ^= x;
}
for (i = 0; i < nb; ++i)
{
statemt[i * 4] = ret[i * 4];
statemt[1 + i * 4] = ret[1 + i * 4];
statemt[2 + i * 4] = ret[2 + i * 4];
statemt[3 + i * 4] = ret[3 + i * 4];
}
return 0;
}
/* ******** AddRoundKey ********** */
int
AddRoundKey (int statemt[32], int type, int n)
{
int j, nb;
switch (type)
{
case 128128:
case 192128:
case 256128:
nb = 4;
break;
case 128192:
case 192192:
case 256192:
nb = 6;
break;
case 128256:
case 192256:
case 256256:
nb = 8;
break;
}
for (j = 0; j < nb; ++j)
{
statemt[j * 4] ^= word[0][j + nb * n];
statemt[1 + j * 4] ^= word[1][j + nb * n];
statemt[2 + j * 4] ^= word[2][j + nb * n];
statemt[3 + j * 4] ^= word[3][j + nb * n];
}
return 0;
}

View File

@ -0,0 +1,163 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes_key.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
/* **************key generate & key display *******************/
const int Rcon0[30] = {
0x01, 0x02, 0x04, 0x08,
0x10, 0x20, 0x40, 0x80,
0x1b, 0x36, 0x6c, 0xd8,
0xab, 0x4d, 0x9a, 0x2f,
0x5e, 0xbc, 0x63, 0xc6,
0x97, 0x35, 0x6a, 0xd4,
0xb3, 0x7d, 0xfa, 0xef,
0xc5, 0x91,
};
/* **************** key expand ************************ */
int
KeySchedule (int type, int key[32])
{
int nk, nb, round_val;
int i, j, temp[4];
switch (type)
{
case 128128:
nk = 4;
nb = 4;
round_val = 10;
break;
case 128192:
nk = 4;
nb = 6;
round_val = 12;
break;
case 128256:
nk = 4;
nb = 8;
round_val = 14;
break;
case 192128:
nk = 6;
nb = 4;
round_val = 12;
break;
case 192192:
nk = 6;
nb = 6;
round_val = 12;
break;
case 192256:
nk = 6;
nb = 8;
round_val = 14;
break;
case 256128:
nk = 8;
nb = 4;
round_val = 14;
break;
case 256192:
nk = 8;
nb = 6;
round_val = 14;
break;
case 256256:
nk = 8;
nb = 8;
round_val = 14;
break;
default:
return -1;
}
for (j = 0; j < nk; ++j)
for (i = 0; i < 4; ++i)
/* 0 word */
word[i][j] = key[i + j * 4];
/* expanded key is generated */
for (j = nk; j < nb * (round_val + 1); ++j)
{
/* RotByte */
if ((j % nk) == 0)
{
temp[0] = SubByte (word[1][j - 1]) ^ Rcon0[(j / nk) - 1];
temp[1] = SubByte (word[2][j - 1]);
temp[2] = SubByte (word[3][j - 1]);
temp[3] = SubByte (word[0][j - 1]);
}
if ((j % nk) != 0)
{
temp[0] = word[0][j - 1];
temp[1] = word[1][j - 1];
temp[2] = word[2][j - 1];
temp[3] = word[3][j - 1];
}
if (nk > 6 && j % nk == 4)
for (i = 0; i < 4; ++i)
temp[i] = SubByte (temp[i]);
for (i = 0; i < 4; ++i)
word[i][j] = word[i][j - nk] ^ temp[i];
}
return 0;
}

Binary file not shown.

View File

@ -0,0 +1,207 @@
/*
+--------------------------------------------------------------------------+
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
| ======================================================================== |
| |
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
| H. Takada and K. Ishii |
| Nagoya University, Japan |
| |
| * Remark : |
| 1. This source code is modified to unify the formats of the benchmark |
| programs in CHStone. |
| 2. Test vectors are added for CHStone. |
| 3. If "main_result" is 0 at the end of the program, the program is |
| correctly executed. |
| 4. Please follow the copyright of each benchmark program. |
+--------------------------------------------------------------------------+
*/
/* aes.c */
/*
* Copyright (C) 2005
* Akira Iwata & Masayuki Sato
* Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan.
*
* All rights reserved.
*
* This software is written by Masayuki Sato.
* And if you want to contact us, send an email to Kimitake Wakayama
* (wakayama@elcom.nitech.ac.jp)
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. All advertising materials mentioning features or use of this software must
* display the following acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* 4. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by Akira Iwata Laboratory,
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
*
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
*/
#include <stdio.h>
//#include "mmio.h"
#include "time.h"
#include "../os_utils.h"
#define ACCEL_CONTROL 0x30000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x30004
#define ACCEL_STATEMT 0x30018
#define ACCEL_KEY 0x30024
#define ACCEL_TYPE 0x30030
#define ACCEL_RET 0x30010
int main_result;
#include "aes.h"
#include "aes_enc.c"
#include "aes_dec.c"
#include "aes_key.c"
#include "aes_func.c"
#include "../custom_mmap/mmap_driver.c"
int encrypt_accel(int* statemt, int* key, int type){
uint64_t addr;
// Disable interrupt for now
//reg_write32(ACCEL_INT, 0x0);
access_addr(ACCEL_INT, OUT, 0);
int fd1 = mmap_init();
int fd2 = mmap_init();
char * addr1 = copy_to_buffer((char*)statemt, 32 * sizeof(int), fd1);
addr = vtop_translate(statemt);
addr = vtop_translate(addr1);
access_addr(ACCEL_STATEMT, OUT, addr);
access_addr(ACCEL_STATEMT + 4, OUT, addr >> 32);
char * addr2 = copy_to_buffer((char*)key, 32 * sizeof(int), fd2);
//addr = vtop_translate(key);
addr = vtop_translate(addr2);
access_addr(ACCEL_KEY, OUT, addr);
access_addr(ACCEL_KEY + 4, OUT, addr >> 32);
//addr = vtop_translate(key);
access_addr(ACCEL_TYPE, OUT, type);
// Set up pointer a and pointer b address
// reg_write32(ACCEL_STATEMT, (uint32_t)statemt);
// reg_write32(ACCEL_KEY, (uint32_t)key);
// reg_write32(ACCEL_TYPE, (uint32_t)type);
// Write to ap_start to start the execution
access_addr(ACCEL_CONTROL, OUT, 0x1);
// reg_write32(ACCEL_CONTROL, 0x1);
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
// Done?
int done = 0;
while (!done){
// done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
}
memcpy(statemt, addr1, 32 * 4 );
memcpy(key, addr2, 32 * 4 );
mmap_delete(fd1, addr1);
mmap_delete(fd2, addr2);
//int c = reg_read32(ACCEL_RET);
return 0;
}
/* ***************** main **************************** */
int
aes_main (void)
{
/*
+--------------------------------------------------------------------------+
| * Test Vectors (added for CHStone) |
| statemt, key : input data |
+--------------------------------------------------------------------------+
*/
statemt[0] = 50;
statemt[1] = 67;
statemt[2] = 246;
statemt[3] = 168;
statemt[4] = 136;
statemt[5] = 90;
statemt[6] = 48;
statemt[7] = 141;
statemt[8] = 49;
statemt[9] = 49;
statemt[10] = 152;
statemt[11] = 162;
statemt[12] = 224;
statemt[13] = 55;
statemt[14] = 7;
statemt[15] = 52;
key[0] = 43;
key[1] = 126;
key[2] = 21;
key[3] = 22;
key[4] = 40;
key[5] = 174;
key[6] = 210;
key[7] = 166;
key[8] = 171;
key[9] = 247;
key[10] = 21;
key[11] = 136;
key[12] = 9;
key[13] = 207;
key[14] = 79;
key[15] = 60;
int64_t begin, end, dur;
begin = read_cycle();
#ifdef CUSTOM_DRIVER
encrypt_accel(statemt, key, 128128);
#else
encrypt (statemt, key, 128128);
#endif
end = read_cycle();
decrypt (statemt, key, 128128);
duration(begin, end);
return 0;
}
int
main ()
{
open_dev_mem();
main_result = 0;
aes_main ();
printf ("\n%d\n", main_result);
close_dev_mem();
return main_result;
}

View File

@ -0,0 +1,19 @@
TARGET ?=XXX
VERBOSE=1
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=bitonic
FUNC=sort
LDFLAGS=
CFLAGS=
CPP ?= 0
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=bitonic_tl
FUNC=sort
LDFLAGS=
CFLAGS=
CPP ?= 0
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -1,81 +0,0 @@
//#pragma once
/*----------------------------------------------------------------------------
*
* Author: Liang Ma (liang-ma@polito.it)
*
*----------------------------------------------------------------------------
*/
#ifdef CUSTOM_DRIVER
#include "bm_wrapper.h"
#include "rocc.h"
#endif
#include "time.h"
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
//#define GLOBAL
// Total exp of actual size
#define EXP 3
// Total exp of arr buffer size
#define LIMIT 2
//#include "bitonic_accel.cpp"
#include "bitonic.h"
#ifdef CUSTOM_DRIVER
uint64_t vtop_translate(uint64_t src){
#define XCUSTOM_ACC 3
//asm volatile ("fence.i");
uint64_t ret;
ROCC_INSTRUCTION_DS(XCUSTOM_ACC, ret, src, 0);
//asm volatile ("fence.i");
printf ("Translate Addr VA %ld to PA %ld\t", src, ret);
return ret;
}
#endif
int main(int argc, char** argv)
{
// input and output parameters
int SIZE = ARRAY_SIZE;
typedef int TYPE;
TYPE h_a[SIZE];
int dir = 1;
int i = 0;
for(i = 0;i<SIZE;i++)
{
//h_a[i]=rand()%(100*ARRAY_SIZE);
h_a[i] = i;
}
uint64_t begin, end, dur;
begin = read_cycle();
#ifdef CUSTOM_DRIVER
sort_wrapper(h_a, dir);
#else
sort(h_a, dir);
#endif
end = read_cycle();
duration(begin, end);
int err = 0;
for (i = 0 ; i < SIZE; i++)
{
printf("array[%d]=%d\n", i, h_a[i]);
if(i+1 == SIZE)
break;
if(( h_a[i] > h_a[i + 1])!=dir){
err++;
}
}
printf("There is/are %d error(s).\n", err);
if(err!=0)
return 1;
return 0;
}

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=bitonic
FUNC=encrypt
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -1,26 +1,22 @@
//#pragma once
#include "rocc.h"
/*----------------------------------------------------------------------------
*
* Author: Liang Ma (liang-ma@polito.it)
*
*----------------------------------------------------------------------------
*/
#include "time.h"
#ifdef CUSTOM_INST
#include "bm_wrapper.h"
#endif
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
//#define GLOBAL
#include "../os_utils.h"
// Total exp of actual size
#define EXP 3
#define EXP 9
// Total exp of arr buffer size
#define LIMIT 2
#define LIMIT 4
//#include "bitonic_accel.cpp"
#include "bitonic.h"
@ -39,28 +35,31 @@ int main(int argc, char** argv)
//h_a[i]=rand()%(100*ARRAY_SIZE);
h_a[i] = i;
}
uint64_t begin, end;
begin = read_cycle();
uint64_t begin, end, dur;
volatile int block;
begin = read_cycle();
#ifdef CUSTOM_INST
sort_wrapper((uint64_t)h_a, (uint64_t)dir);
#define XCUSTOM_ACC 1
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block, vtop_translate(h_a), vtop_translate(dir), 0);
//ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block, h_a, dir, 0);
#else
sort(h_a, dir);
#endif
end = read_cycle();
duration(begin, end);
end = read_cycle();
duration(begin, end);
int err = 0;
for (i = 0 ; i < SIZE; i++)
{
printf("array[%d]=%d\n", i, h_a[i]);
if(i+1 == SIZE)
break;
if(( h_a[i] > h_a[i + 1])!=dir){
err++;
}
}
// for (i = 0 ; i < SIZE; i++)
// {
// printf("array[%d]=%d\n", i, h_a[i]);
// if(i+1 == SIZE)
// break;
// if(( h_a[i] > h_a[i + 1])!=dir){
// err++;
// }
//
// }
printf("There is/are %d error(s).\n", err);
if(err!=0)

View File

@ -71,7 +71,7 @@ void mergeLocal(int* array, int bits, int id, int dir){
memcpy(array + id * LOCAL_SIZE, localArray, LOCAL_SIZE * sizeof(int));
}
int sort(int* array, int dir){
void sort(int* array, int dir){
#pragma HLS INTERFACE ap_bus depth=10 port=array
#pragma HLS INLINE
@ -120,7 +120,6 @@ int sort(int* array, int dir){
mergeLocal(bits, id, dir);
}
}
return 0;
#endif
}

View File

@ -0,0 +1,98 @@
// Based on code by Schuyler Eldridge. Copyright (c) Boston University
// https://github.com/seldridge/rocket-rocc-examples/blob/master/src/main/c/rocc.h
#ifndef SRC_MAIN_C_ROCC_H
#define SRC_MAIN_C_ROCC_H
#include <stdint.h>
#define STR1(x) #x
#define STR(x) STR1(x)
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
#define CUSTOM_0 0b0001011
#define CUSTOM_1 0b0101011
#define CUSTOM_2 0b1011011
#define CUSTOM_3 0b1111011
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
CUSTOMX_OPCODE(X) | \
(rd << (7)) | \
(xs2 << (7+5)) | \
(xs1 << (7+5+1)) | \
(xd << (7+5+2)) | \
(rs1 << (7+5+3)) | \
(rs2 << (7+5+3+5)) | \
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
// Standard macro that passes rd, rs1, and rs2 via registers
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
#define ROCC_INSTRUCTION_D(X, rd, funct) \
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
#define ROCC_INSTRUCTION(X, funct) \
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
// rd, rs1, and rs2 are data
// rd_n, rs_1, and rs2_n are the register numbers to use
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
: "=r" (rd_) \
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
: "=r" (rd_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
}
#endif // SRC_MAIN_C_ACCUMULATOR_H

View File

@ -0,0 +1,19 @@
VERBOSE= 1
TARGET ?=bitonic_tl
FUNC=sort
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -0,0 +1,106 @@
//#pragma once
#include "../os_utils.h"
#define ACCEL_CONTROL 0x30000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x30004
#define ACCEL_SRC 0x30010
#define ACCEL_DIR 0x3001c
/*----------------------------------------------------------------------------
*
* Author: Liang Ma (liang-ma@polito.it)
*
*----------------------------------------------------------------------------
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
//#define GLOBAL
// Total exp of actual size
#define EXP 3
// Total exp of arr buffer size
#define LIMIT 2
//#include "bitonic_accel.cpp"
#include "bitonic.h"
#define ARRAY_SIZE 8
int sort_accel (int* src, int dir){
//uint64_t src_pa = vtop_translate((uint64_t)src);
uint64_t src_pa = (uint64_t)src;
// Disable interrupt for now
//reg_write32(ACCEL_INT, 0x0);
access_addr(ACCEL_INT, OUT, 0);
uint64_t addr;
addr = vtop_translate(src);
access_addr(ACCEL_SRC, OUT, addr);
access_addr(ACCEL_SRC + 4, OUT, addr >> 32);
access_addr(ACCEL_DIR, OUT, dir);
// Set up pointer a and pointer b address
// reg_write32(ACCEL_SRC, (uint64_t)src_pa);
// reg_write32(ACCEL_DIR, (uint32_t)dir);
// Write to ap_start to start the execution
access_addr(ACCEL_CONTROL, OUT, 0x1);
//reg_write32(ACCEL_CONTROL, 0x1);
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
// Done?
int done = 0;
while (!done){
// done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
}
return 0;
}
int main(int argc, char** argv)
{
// input and output parameters
int SIZE = ARRAY_SIZE;
typedef int TYPE;
TYPE h_a[SIZE];
int dir = 1;
int i = 0;
for(i = 0;i<SIZE;i++)
{
//h_a[i]=rand()%(100*ARRAY_SIZE);
h_a[i] = i;
}
uint64_t begin, end, dur;
begin = read_cycle();
#ifdef CUSTOM_DRIVER
sort_accel(h_a, dir);
#else
sort(h_a, dir);
#endif
end = read_cycle();
duration(begin, end);
int err;
for (i = 0 ; i < SIZE; i++)
{
printf("array[%d]=%d\n", i, h_a[i]);
if(i+1 == SIZE)
break;
if(( h_a[i] > h_a[i + 1])!=dir){
err++;
}
}
printf("There is/are %d error(s).\n", err);
if(err!=0)
return 1;
return 0;
}

View File

@ -1,25 +0,0 @@
VERBOSE= 1
TARGET ?=bram_tl
FUNC=top
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -1,155 +0,0 @@
#include <stdio.h>
#include <stdint.h>
#define ACCEL_CONTROL 0x20000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x20004
#define ACCEL_RETURN 0x20010
#define ACCEL_PARAM1 0x20018
#define ACCEL_PARAM2 0x20020
#define ACCEL_NIC 0x2002c
#define ACCEL_BRAM 0x22000
#define ICENET_NAME "icenet"
#define ICENET_IO_BASE 0x10016000
#define ICENET_SEND_REQ 0
#define ICENET_RECV_REQ 8
#define ICENET_SEND_COMP 16
#define ICENET_RECV_COMP 18
#define ICENET_COUNTS 20
#define ICENET_MACADDR 24
#define ICENET_IO_SIZE 32
#define CIRC_BUF_LEN 16
#define ALIGN_BYTES 8
#define ALIGN_MASK 0x7
#define ALIGN_SHIFT 3
size_t sendq_max;
size_t recvq_max;
uint64_t mac;
#ifdef CUSTOM_DRIVER
#include "/scratch/qijing.huang/firesim_new/hls/sw/bm//mmio.h"
int top_accel(int param1, int param2) {
// Disable interrupt for now
reg_write32(ACCEL_INT, 0x0);
// Set up pointer a and pointer b address
reg_write32(ACCEL_PARAM1, (uint32_t)param1);
reg_write32(ACCEL_PARAM2, (uint32_t)param2);
//reg_write32(ACCEL_BRAM + param1, (uint32_t) 17);
//reg_write32(ACCEL_BRAM + param2, (uint32_t) 18);
// reg_write32(ACCEL_BRAM, 0x1);
// reg_write32(ACCEL_BRAM+(param1 << 2), 0x7);
// reg_write32(ACCEL_BRAM+(param2 << 2), 0x8);
uint64_t srcmac = reg_read64(ICENET_IO_BASE+ICENET_MACADDR);
printf("srcmac %d\n", srcmac);
reg_write32(ACCEL_CONTROL+0x4000, srcmac);
reg_write32(ACCEL_CONTROL+0x4004, srcmac >> 32);
reg_write32(ACCEL_CONTROL+0x400c, srcmac);
reg_write32(ACCEL_CONTROL+0x4010, srcmac >> 32);
// Write to ap_start to start the execution
reg_write32(ACCEL_CONTROL, 0x1);
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
// Done?
int done = 0;
while (!done){
done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
}
//int bram = reg_read32(ACCEL_BRAM);
//printf("bram = %d\n", bram);
//int ret = reg_read32(ACCEL_BRAM+param1);
int ret = reg_read32(ACCEL_RETURN);
printf("ret = %d\n", ret);
return ret;
}
#endif
static inline int send_req_avail(uint64_t* nic)
{
return nic[ICENET_COUNTS] & 0xf;
}
static inline int recv_req_avail(uint64_t* nic)
{
return (nic[ICENET_COUNTS] >> 4) & 0xf;
}
static inline int send_comp_avail(uint64_t* nic)
{
return (nic[ICENET_COUNTS] >> 8) & 0xf;
}
static inline int recv_comp_avail(uint64_t* nic)
{
return (nic[ICENET_COUNTS] >> 12) & 0xf;
}
void ice_post_send(uint64_t* nic, int last, uintptr_t paddr, size_t len)
{
uint64_t command = 0;
if( ((paddr & 0x7ll) != 0) ||
((len % 8) != 0)) {
printf("paddr: 0x%lx, len = 0x%lx\n", paddr, len);
}
command = (len << 48) | (paddr & 0xffffffffffffL);
command |= last ? 0 : (1ul << 63);
/* iowrite64(command, nic->iomem + ICENET_SEND_REQ); */
//writeq(command, nic->iomem + ICENET_SEND_REQ);
nic[ICENET_SEND_REQ] = command;
}
void ice_post_recv(uint64_t* nic, uintptr_t paddr)
{
if((paddr & 0x7) != 0) {
//panic("Unaligned receive buffer: %lx\n", paddr);
;
}
/* iowrite64(paddr, nic->iomem + ICENET_RECV_REQ); */
//writeq(paddr, nic->iomem + ICENET_RECV_REQ);
nic[ICENET_RECV_REQ] = paddr;
}
void ice_drain_sendq(uint64_t* nic)
{
/* Poll until there are no more pending sends */
while(send_req_avail(nic) < sendq_max) {
;
}
/* Drain send_compq */
while (send_comp_avail(nic) > 0) {
//ioread16(nic->iomem + ICENET_SEND_COMP);
uint64_t tmp = nic[ICENET_SEND_COMP];
}
return;
}
size_t ice_recv_one(uint64_t* nic)
{
/* Wait for there to be something in the recv_comp Q */
while(recv_comp_avail(nic) == 0) { ; }
/* Pop exactly one thing off Q */
//return (size_t)ioread16(nic->iomem + ICENET_RECV_COMP);
return (size_t)nic[ICENET_RECV_COMP];
}
int main(){
#ifdef CUSTOM_DRIVER
top_accel(4,12);
#endif
printf("main\n");
return 0;
}

View File

@ -1,223 +0,0 @@
#include <stdio.h>
#include <stdint.h>
#include "ap_int.h"
#include "hls_stream.h"
#define ACCEL_CONTROL 0x20000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x20004
#define ACCEL_RETURN 0x20010
#define ACCEL_PARAM1 0x20018
#define ACCEL_PARAM2 0x20020
#define ACCEL_NIC 0x2002c
#define ACCEL_BRAM 0x22000
#define ICENET_NAME "icenet"
#define ICENET_IO_BASE 0x10016000
#define ICENET_SEND_REQ 0
#define ICENET_RECV_REQ 8
#define ICENET_SEND_COMP 16
#define ICENET_RECV_COMP 18
#define ICENET_COUNTS 20
#define ICENET_MACADDR 24
#define ICENET_IO_SIZE 32
#define CIRC_BUF_LEN 16
#define ALIGN_BYTES 8
#define ALIGN_MASK 0x7
#define ALIGN_SHIFT 3
size_t sendq_max;
size_t recvq_max;
uint64_t mac;
#ifdef CUSTOM_DRIVER
#include "/scratch/qijing.huang/firesim_new/hls/sw/bm//mmio.h"
int top_accel(int param1, int param2) {
// Disable interrupt for now
reg_write32(ACCEL_INT, 0x0);
// Set up pointer a and pointer b address
reg_write32(ACCEL_PARAM1, (uint32_t)param1);
reg_write32(ACCEL_PARAM2, (uint32_t)param2);
//reg_write32(ACCEL_BRAM + param1, (uint32_t) 17);
//reg_write32(ACCEL_BRAM + param2, (uint32_t) 18);
reg_write32(ACCEL_BRAM, 0x1);
reg_write32(ACCEL_BRAM+(param1 << 2), 0x7);
reg_write32(ACCEL_BRAM+(param2 << 2), 0x8);
// Write to ap_start to start the execution
reg_write32(ACCEL_CONTROL, 0x1);
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
// Done?
int done = 0;
while (!done){
done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
}
int bram = reg_read32(ACCEL_BRAM);
printf("bram = %d\n", bram);
//int ret = reg_read32(ACCEL_BRAM+param1);
int ret = reg_read32(ACCEL_RETURN);
printf("ret = %d\n", ret);
return ret;
}
#endif
static inline int send_req_avail(uint64_t* nic)
{
return nic[ICENET_COUNTS] & 0xf;
}
static inline int recv_req_avail(uint64_t* nic)
{
return (nic[ICENET_COUNTS] >> 4) & 0xf;
}
static inline int send_comp_avail(uint64_t* nic)
{
return (nic[ICENET_COUNTS] >> 8) & 0xf;
}
static inline int recv_comp_avail(uint64_t* nic)
{
return (nic[ICENET_COUNTS] >> 12) & 0xf;
}
void ice_post_send(uint64_t* nic, int last, uintptr_t paddr, size_t len)
{
uint64_t command = 0;
if( ((paddr & 0x7ll) != 0) ||
((len % 8) != 0)) {
printf("paddr: 0x%lx, len = 0x%lx\n", paddr, len);
}
command = (len << 48) | (paddr & 0xffffffffffffL);
command |= last ? 0 : (1ul << 63);
/* iowrite64(command, nic->iomem + ICENET_SEND_REQ); */
//writeq(command, nic->iomem + ICENET_SEND_REQ);
nic[ICENET_SEND_REQ] = command;
}
void ice_post_recv(uint64_t* nic, uintptr_t paddr)
{
if((paddr & 0x7) != 0) {
//panic("Unaligned receive buffer: %lx\n", paddr);
;
}
/* iowrite64(paddr, nic->iomem + ICENET_RECV_REQ); */
//writeq(paddr, nic->iomem + ICENET_RECV_REQ);
nic[ICENET_RECV_REQ] = paddr;
}
void ice_drain_sendq(uint64_t* nic)
{
/* Poll until there are no more pending sends */
while(send_req_avail(nic) < sendq_max) {
;
}
/* Drain send_compq */
while (send_comp_avail(nic) > 0) {
//ioread16(nic->iomem + ICENET_SEND_COMP);
uint64_t tmp = nic[ICENET_SEND_COMP];
}
return;
}
size_t ice_recv_one(uint64_t* nic)
{
/* Wait for there to be something in the recv_comp Q */
while(recv_comp_avail(nic) == 0) { ; }
/* Pop exactly one thing off Q */
//return (size_t)ioread16(nic->iomem + ICENET_RECV_COMP);
return (size_t)nic[ICENET_RECV_COMP];
}
// val ethType = UInt(ETH_TYPE_BITS.W)
// val srcmac = UInt(ETH_MAC_BITS.W)
// val dstmac = UInt(ETH_MAC_BITS.W)
// val padding = UInt(ETH_PAD_BITS.W)
// val NET_IF_WIDTH = 64
// val NET_IF_BYTES = NET_IF_WIDTH/8
// val NET_LEN_BITS = 16
//
// val ETH_MAX_BYTES = 1520
// val ETH_HEAD_BYTES = 16
// val ETH_MAC_BITS = 48
// val ETH_TYPE_BITS = 16
// val ETH_PAD_BITS = 16
int top(int param1, int param2, uint64_t* nic, int paramtable[1600], hls::stream<ap_uint<128> >& req_head, hls::stream<ap_uint<65> >& req_data, hls::stream<ap_uint<128> >& resp_head, hls::stream<ap_uint<65> >& resp_data, ap_uint<64>srcmac, ap_uint<64>dstmac) {
//#pragma HLS dataflow
#pragma HLS INTERFACE s_axilite port=nic bundle=control
#pragma HLS INTERFACE m_axi port=nic offset=slave bundle=gmem0
#pragma HLS interface ap_fifo port=req_head
#pragma HLS interface ap_fifo port=req_data
#pragma HLS interface ap_fifo port=resp_head
#pragma HLS interface ap_fifo port=resp_data
#pragma HLS interface s_axilite bundle=control port=param1
#pragma HLS interface s_axilite bundle=control port=param2
#pragma HLS interface s_axilite bundle=control port=paramtable
#pragma HLS interface s_axilite bundle=control port=srcmac
#pragma HLS interface s_axilite bundle=control port=dstmac
#pragma HLS interface s_axilite bundle=control port=return
ap_uint<16> RMEM_REQ_ETH_TYPE = 0x0408L;
ap_uint<16> RMEM_RESP_ETH_TYPE = 0x0508L;
ap_uint<128> resp_eth_head;
ap_uint<128> req_eth_head;
resp_eth_head.range(15,0) = 0;
resp_eth_head.range(63,16) = dstmac;
resp_eth_head.range(111,64) = srcmac;
resp_eth_head.range(127,112) = RMEM_RESP_ETH_TYPE;
ap_uint<65> send_data;
send_data(63,0) = 77;
send_data(64,64) = 1;
// Send a request
volatile int count = 1;
if (count == 1)
resp_head.write(resp_eth_head);
count ++;
if (count == 2)
resp_data.write(send_data);
count ++;
// Loopback
ap_uint<64> read_head;
ap_uint<65> recv_data;
if (count == 3)
read_head = req_head.read();
count ++;
if (count == 4)
recv_data = req_data.read();
count ++;
//recvq_max = recv_req_avail(nic);
//sendq_max = send_req_avail(nic);
//int sum = paramtable[param1] + paramtable[param2];
//printf("recvq_max %d\n", recvq_max);
//printf("sendq_max %d\n", sendq_max);
return recv_data.range(63,0) + recv_data.range(64,64);
}
int main(){
#ifdef CUSTOM_DRIVER
top_accel(4,12);
#endif
printf("main\n");
return 0;
}

View File

@ -0,0 +1,79 @@
#ifndef COMMON_H
#define COMMON_H
#define _XOPEN_SOURCE 700
#include <fcntl.h> /* open */
#include <stdint.h> /* uint64_t */
#include <stdlib.h> /* size_t */
#include <unistd.h> /* pread, sysconf */
/* Format documented at:
* https://github.com/torvalds/linux/blob/v4.9/Documentation/vm/pagemap.txt
**/
typedef struct {
uint64_t pfn : 54;
unsigned int soft_dirty : 1;
unsigned int file_page : 1;
unsigned int swapped : 1;
unsigned int present : 1;
} PagemapEntry;
/* Parse the pagemap entry for the given virtual address.
*
* @param[out] entry the parsed entry
* @param[in] pagemap_fd file descriptor to an open /proc/pid/pagemap file
* @param[in] vaddr virtual address to get entry for
* @return 0 for success, 1 for failure
*/
int pagemap_get_entry(PagemapEntry *entry, int pagemap_fd, uintptr_t vaddr)
{
size_t nread;
ssize_t ret;
uint64_t data;
nread = 0;
while (nread < sizeof(data)) {
ret = pread(pagemap_fd, &data, sizeof(data),
(vaddr / sysconf(_SC_PAGE_SIZE)) * sizeof(data) + nread);
nread += ret;
if (ret <= 0) {
return 1;
}
}
entry->pfn = data & (((uint64_t)1 << 54) - 1);
entry->soft_dirty = (data >> 54) & 1;
entry->file_page = (data >> 61) & 1;
entry->swapped = (data >> 62) & 1;
entry->present = (data >> 63) & 1;
return 0;
}
/* Convert the given virtual address to physical using /proc/PID/pagemap.
*
* @param[out] paddr physical address
* @param[in] pid process to convert for
* @param[in] vaddr virtual address to get entry for
* @return 0 for success, 1 for failure
*/
int virt_to_phys_user(uintptr_t *paddr, pid_t pid, uintptr_t vaddr)
{
char pagemap_file[BUFSIZ];
int pagemap_fd;
snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
pagemap_fd = open(pagemap_file, O_RDONLY);
if (pagemap_fd < 0) {
printf("invalid fd!\n");
return 1;
}
PagemapEntry entry;
if (pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
printf("invalid entry!\n");
return 1;
}
close(pagemap_fd);
*paddr = (entry.pfn * sysconf(_SC_PAGE_SIZE)) + (vaddr % sysconf(_SC_PAGE_SIZE));
return 0;
}
#endif

View File

@ -0,0 +1,250 @@
/*
* Remember: mmap, like most fops, does not work with debugfs as of 4.9! https://patchwork.kernel.org/patch/9252557/
* Adapted from:
* https://coherentmusings.wordpress.com/2014/06/10/implementing-mmap-for-transferring-data-from-user-space-to-kernel-space/
* */
#include <asm/uaccess.h> /* copy_from_user */
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* min */
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/capability.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/shmem_fs.h>
#include <linux/profile.h>
#include <linux/export.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
#include <linux/mmdebug.h>
#include <linux/perf_event.h>
#include <linux/audit.h>
#include <linux/khugepaged.h>
#include <linux/uprobes.h>
#include <linux/rbtree_augmented.h>
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/userfaultfd_k.h>
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>
static const char *filename = "custom_mmap";
static struct mmap_info *info;
enum { BUFFER_SIZE = 4 , ORDER=8};
struct mmap_info {
char *data;
};
/* After unmap. */
static void vm_close(struct vm_area_struct *vma)
{
pr_info("vm_close\n");
}
/* First page access. */
//static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
static int vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
unsigned long vm_start = vma->vm_start;
unsigned long vm_end = vma->vm_end;
unsigned long vmf_addr = vmf->address;
pr_info("vm_start %llx, vm_end %llx, vmf_addr %llx.\n", vm_start, vm_end, vmf_addr);
// Create linear mapping
unsigned long offset = vmf_addr - vm_start;
struct page *page;
struct mmap_info *info;
pr_info("vm_fault\n");
info = (struct mmap_info *)vma->vm_private_data;
// If the base logical addr of kernel buffer exists
if (info->data) {
page = virt_to_page((info->data) + offset);
pr_info("phy %llx virt %llx\n", page_to_phys(page), (info->data) + offset);
get_page(page);
vmf->page = page;
int ret = remap_pfn_range(vma, vmf_addr, page_to_pfn(page), PAGE_SIZE, vma->vm_page_prot);
if (ret)
return ret;
}
return 0;
}
/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */
static void vm_open(struct vm_area_struct *vma)
{
pr_info("vm_open\n");
}
static struct vm_operations_struct vm_ops =
{
.close = vm_close,
.fault = vm_fault,
.open = vm_open,
};
static int mmap(struct file *filp, struct vm_area_struct *vma)
{
pr_info("mmap\n");
vma->vm_ops = &vm_ops;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_private_data = filp->private_data;
vm_open(vma);
return 0;
}
static int open(struct inode *inode, struct file *filp)
{
// struct mmap_info *info;
pr_info("open\n");
// info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
// pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info));
// //info->data = (char *)get_zeroed_page(GFP_KERNEL);
// unsigned long order = ORDER; // TODO add module args
// // data maps to logical address of the buffer
// info->data = (char* )__get_free_pages(GFP_KERNEL, order);
// if (!info->data) {
// pr_info("Fail to allocate free pages!\n");
// /* insufficient memory: you must handle this error! */
// return -ENOMEM;
// }
// pr_info("kernel logical addr 0x%llx\n", (unsigned long)(info->data));
// // By opening the file, a new vma struct is created
memcpy(info->data, "asdf", BUFFER_SIZE);
filp->private_data = info;
return 0;
}
static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
struct mmap_info *info;
int ret;
// off is the file offset dont use it!
//pr_info("read offset %ld\n", off);
unsigned long offset = 4500;
pr_info("read offset %ld\n", offset);
info = filp->private_data;
ret = min(len, (size_t)BUFFER_SIZE);
//if (copy_to_user(buf, (unsigned long)(info->data) + (unsigned long)off, ret)) {
if (copy_to_user(buf, (unsigned long)(info->data) + (unsigned long)offset, ret)) {
ret = -EFAULT;
}
return ret;
}
static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
{
struct mmap_info *info;
unsigned long offset = 4500;
pr_info("write offset %ld\n", offset);
info = filp->private_data;
if (copy_from_user((unsigned long)(info->data) + (unsigned long)offset, buf, min(len, (size_t)BUFFER_SIZE))) {
return -EFAULT;
} else {
return len;
}
}
static int release(struct inode *inode, struct file *filp)
{
// struct mmap_info *info;
// pr_info("release\n");
// info = filp->private_data;
// //free_page((unsigned long)info->data);
// unsigned long order = ORDER;
// free_pages(info->data, order);
// printk("Freeing 2 ^ %d pages\n", order);
// kfree(info);
// filp->private_data = NULL;
return 0;
}
static const struct file_operations fops = {
.mmap = mmap,
.open = open,
.release = release,
.read = read,
.write = write,
};
static int myinit(void)
{
proc_create(filename, 0, NULL, &fops);
pr_info("open\n");
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
//info->data = (char *)get_zeroed_page(GFP_KERNEL);
unsigned long order = ORDER; // TODO add module args
// data maps to logical address of the buffer
info->data = (char* )__get_free_pages(GFP_KERNEL, order);
pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info->data));
if (!info->data) {
pr_info("Fail to allocate free pages!\n");
/* insufficient memory: you must handle this error! */
return -ENOMEM;
}
pr_info("kernel logical addr 0x%llx\n", (unsigned long)(info->data));
return 0;
}
static void myexit(void)
{
//info = filp->private_data;
//free_page((unsigned long)info->data);
unsigned long order = ORDER;
free_pages(info->data, order);
printk("Freeing 2 ^ %d pages\n", order);
kfree(info);
remove_proc_entry(filename, NULL);
}
module_init(myinit)
module_exit(myexit)
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,78 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __ASM_GENERIC_MMAN_COMMON_H
#define __ASM_GENERIC_MMAN_COMMON_H
/*
Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
Based on: asm-xxx/mman.h
*/
#define PROT_READ 0x1 /* page can be read */
#define PROT_WRITE 0x2 /* page can be written */
#define PROT_EXEC 0x4 /* page can be executed */
#define PROT_SEM 0x8 /* page may be used for atomic ops */
#define PROT_NONE 0x0 /* page can not be accessed */
#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
#define MAP_TYPE 0x0f /* Mask for type of mapping */
#define MAP_FIXED 0x10 /* Interpret addr exactly */
#define MAP_ANONYMOUS 0x20 /* don't use a file */
#define MAP_ALLOC 0x40 /* don't use a file */
#define MAP_FREE 0x80 /* don't use a file */
#define MAP_COPY_FROM_USER 0x100 /* don't use a file */
#define MAP_COPY_TO_USER 0x200 /* don't use a file */
#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */
#else
# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
#endif
/*
* Flags for mlock
*/
#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */
#define MS_ASYNC 1 /* sync memory asynchronously */
#define MS_INVALIDATE 2 /* invalidate the caches */
#define MS_SYNC 4 /* synchronous memory sync */
#define MADV_NORMAL 0 /* no further special treatment */
#define MADV_RANDOM 1 /* expect random page references */
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
#define MADV_WILLNEED 3 /* will need these pages */
#define MADV_DONTNEED 4 /* don't need these pages */
/* common parameters: try to keep these consistent across architectures */
#define MADV_FREE 8 /* free pages only if memory pressure */
#define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
#define MADV_HWPOISON 100 /* poison a page for testing */
#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
overrides the coredump filter bits */
#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
/* compatibility flags */
#define MAP_FILE 0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
PKEY_DISABLE_WRITE)
#endif /* __ASM_GENERIC_MMAN_COMMON_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h> /* uintmax_t */
#include <string.h>
#include <sys/mman.h>
#include <unistd.h> /* sysconf */
#include "common.h" /* virt_to_phys_user */
#define PAGE_SIZE 4096
int mmap_init()
{
int fd;
char * file = "/proc/origin_mmap";
//page_size = sysconf(_SC_PAGE_SIZE);
unsigned long page_size =PAGE_SIZE;
//printf("page_size %d\n", page_size);
//printf("open pathname = %s\n", file);
fd = open(file, O_RDWR | O_SYNC);
if (fd < 0) {
perror("open");
assert(0);
}
//printf("fd = %d\n", fd);
return fd;
}
unsigned long get_addr(int fd){
unsigned long page_size =PAGE_SIZE;
//printf("page_size %d\n", page_size);
char *addr;
//puts("mmap");
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
return addr;
}
int mmap_delete(int fd, unsigned long addr) {
unsigned long page_size =PAGE_SIZE;
if (munmap(addr, page_size)) {
perror("munmap");
assert(0);
}
close(fd);
}
char* copy_to_buffer(char* addr, unsigned length, int fd){
char * target_addr = (void *)get_addr(fd);
memcpy(target_addr, addr, length);
//for (i = 0; i < length; i++){
// target_addr[i] =
//}
return target_addr;
}

View File

@ -0,0 +1,89 @@
/* Remember: mmap, like most fops, does not work with debugfs as of 4.9! https://patchwork.kernel.org/patch/9252557/
Adapted from:
https://coherentmusings.wordpress.com/2014/06/10/implementing-mmap-for-transferring-data-from-user-space-to-kernel-space/
*/
#include <linux/uaccess.h> /* copy_from_user */
#include <asm/io.h>
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* min */
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#define PAGE_ORDER 8
#define MEMSZ ((1 << PAGE_ORDER) * PAGE_SIZE)
static const char *filename = "test2";
unsigned long kaddr = 0;
char *buf;
static int mmap(struct file *filp, struct vm_area_struct *vma)
{
unsigned long pfn;
pfn = virt_to_phys((void*)kaddr) >> PAGE_SHIFT;
if(remap_pfn_range(vma, vma->vm_start, pfn, (vma->vm_end - vma->vm_start),
vma->vm_page_prot))
{
printk("remap failed...");
return -1;
}
vma->vm_flags |= (VM_DONTDUMP|VM_DONTEXPAND);
printk("remap_pfn_rang pfn:[%lu] ok.\n", pfn);
return 0;
}
static int release(struct inode *inode, struct file *filp)
{
int i = 0;
pr_info("release:\n");
for(i = 0; i < MEMSZ; i += PAGE_SIZE) {
pr_info("Page %d: %s\n", i, buf+i);
}
return 0;
}
static const struct file_operations fops = {
.mmap = mmap,
.release = release
};
static int myinit(void)
{
int i;
proc_create(filename, 0, NULL, &fops);
/* alloc one page */
kaddr = __get_free_pages(GFP_KERNEL, PAGE_ORDER);
if (!kaddr) {
printk("Allocate memory failure!/n");
} else {
//XXX This is techinically needed, but I'm lazy right now
/* SetPageReserved(virt_to_page(kaddr)); */
buf = (char *)kaddr;
for(i = 0; i < MEMSZ; i += PAGE_SIZE) {
sprintf(buf + i, "%d", i >> PAGE_SHIFT);
}
printk("Allocate memory success!.\n");
}
return 0;
}
static void myexit(void)
{
pr_info("mmap2 module exiting\n");
/* ClearPageReserved(virt_to_page(kaddr)); */
free_pages(kaddr, PAGE_ORDER);
remove_proc_entry(filename, NULL);
return;
}
module_init(myinit)
module_exit(myexit)
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,208 @@
/*
Remember: mmap, like most fops, does not work with debugfs as of 4.9! https://patchwork.kernel.org/patch/9252557/
Adapted from:
https://coherentmusings.wordpress.com/2014/06/10/implementing-mmap-for-transferring-data-from-user-space-to-kernel-space/
*/
#include <asm/uaccess.h> /* copy_from_user */
#include <linux/debugfs.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h> /* min */
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/backing-dev.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/capability.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/shmem_fs.h>
#include <linux/profile.h>
#include <linux/export.h>
#include <linux/mount.h>
#include <linux/mempolicy.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
#include <linux/mmdebug.h>
#include <linux/perf_event.h>
#include <linux/audit.h>
#include <linux/khugepaged.h>
#include <linux/uprobes.h>
#include <linux/rbtree_augmented.h>
#include <linux/notifier.h>
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/userfaultfd_k.h>
#include <linux/moduleparam.h>
#include <linux/pkeys.h>
#include <linux/oom.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlb.h>
#include <asm/mmu_context.h>
static const char *filename = "origin_mmap";
enum { BUFFER_SIZE = 4 };
struct mmap_info {
char *data;
};
/* After unmap. */
static void vm_close(struct vm_area_struct *vma)
{
//pr_info("vm_close\n");
}
/* First page access. */
//static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
//{
static int vm_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
unsigned long vm_start = vma->vm_start;
unsigned long vm_end = vma->vm_end;
unsigned long vmf_addr = vmf->address;
//pr_info("vm_start %llx, vm_end %llx, vmf_addr %llx.\n", vm_start, vm_end, vmf_addr);
// Create linear mapping
unsigned long offset = vmf_addr - vm_start;
struct page *page;
struct mmap_info *info;
//pr_info("vm_fault\n");
//pr_info("page size %d\n", PAGE_SIZE);
info = (struct mmap_info *)vma->vm_private_data;
if (info->data) {
page = virt_to_page((info->data) + offset);
//pr_info("phy %llx virt %llx\n", page_to_phys(page), (info->data) + offset);
//page = virt_to_page(info->data);
get_page(page);
vmf->page = page;
}
return 0;
}
/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */
static void vm_open(struct vm_area_struct *vma)
{
//pr_info("vm_open\n");
}
static struct vm_operations_struct vm_ops =
{
.close = vm_close,
.fault = vm_fault,
.open = vm_open,
};
static int mmap(struct file *filp, struct vm_area_struct *vma)
{
//pr_info("mmap\n");
// if (remap_pfn_range(vma, vma->vm_start, virt_to_pfn(flip->private_data->data), vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN;
vma->vm_ops = &vm_ops;
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP ;
vma->vm_private_data = filp->private_data;
vm_open(vma);
return 0;
}
static int open(struct inode *inode, struct file *filp)
{
struct mmap_info *info;
//pr_info("open\n");
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
//info->data = (char *)get_zeroed_page(GFP_KERNEL);
info->data = (char *)get_zeroed_page(GFP_KERNEL);
//info->data = (char* )__get_free_pages(GFP_KERNEL, 8);
//pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info->data));
memcpy(info->data, "asdf", BUFFER_SIZE);
filp->private_data = info;
return 0;
}
static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
struct mmap_info *info;
int ret;
//pr_info("read\n");
info = filp->private_data;
ret = min(len, (size_t)BUFFER_SIZE);
if (copy_to_user(buf, info->data, ret)) {
ret = -EFAULT;
}
return ret;
}
static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
{
struct mmap_info *info;
//pr_info("write\n");
info = filp->private_data;
if (copy_from_user(info->data, buf, min(len, (size_t)BUFFER_SIZE))) {
return -EFAULT;
} else {
return len;
}
}
static int release(struct inode *inode, struct file *filp)
{
struct mmap_info *info;
//pr_info("release\n");
info = filp->private_data;
free_page((unsigned long)info->data);
//free_pages(info->data, 8);
kfree(info);
filp->private_data = NULL;
return 0;
}
static const struct file_operations fops = {
.mmap = mmap,
.open = open,
.release = release,
.read = read,
.write = write,
};
static int myinit(void)
{
proc_create(filename, 0, NULL, &fops);
return 0;
}
static void myexit(void)
{
remove_proc_entry(filename, NULL);
}
module_init(myinit)
module_exit(myexit)
MODULE_LICENSE("GPL");

Binary file not shown.

View File

@ -0,0 +1,19 @@
#include "mmap_driver.c"
int main (){
int a[4] = {0, 2, 3, 4};
int *b;
int fd = mmap_init();
b = (int *)get_addr(fd);
b[3] = a[1];
a[2] = b[2];
int fd2 = mmap_init();
int* c = (int *)get_addr(fd2);
c[4] = b[5];
mmap_delete(fd, b);
mmap_delete(fd2, c);
return 0;
}

View File

@ -0,0 +1,56 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <stdint.h>
#include <assert.h>
#include "common.h"
#define ORDER 8
#define PAGE_SIZE 4096
#define MEMSZ (PAGE_SIZE * (1 << ORDER))
int main(int argc, char *argv[])
{
uintptr_t paddr;
char *vaddr;
/*memory map*/
int map_fd = open("/proc/test2", O_RDWR|O_SYNC);
if (map_fd < 0) {
printf("cannot open file /proc/test2\n");
return -1;
}
vaddr = mmap(NULL, MEMSZ, PROT_READ|PROT_WRITE,
MAP_SHARED, map_fd, 0);
if (vaddr == MAP_FAILED) {
perror("mmap");
printf("MAP_FAILED : %s", vaddr);
close(map_fd);
return -1;
}
char *buf;
for(int i = 0; i < MEMSZ; i += PAGE_SIZE) {
int val;
buf = vaddr + i;
printf("vaddr: %p \n", buf);
assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)buf));
printf("paddr = 0x%jx\n", (uintmax_t)paddr);
val = atoi(buf);
printf("val: %d\n", val);
sprintf(buf, "%d", -val);
}
int ret = munmap(vaddr, PAGE_SIZE*2);
if (ret) {
printf("munmap failed:%d \n",ret);
}
close(map_fd);
return 0;
}

View File

@ -0,0 +1,87 @@
#define _XOPEN_SOURCE 700
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h> /* uintmax_t */
#include <string.h>
#include <sys/mman.h>
#include <unistd.h> /* sysconf */
#include "common.h" /* virt_to_phys_user */
#define MAP_POPULATE 0x8000
enum { BUFFER_SIZE = 4 };
int main(int argc, char **argv)
{
int fd;
long page_size;
char *address1, *address2;
char buf[BUFFER_SIZE];
uintptr_t paddr;
// if (argc < 2) {
// printf("Usage: %s <mmap_file>\n", argv[0]);
// return EXIT_FAILURE;
// }
//
argv[1] = "/proc/custom_mmap";
page_size = sysconf(_SC_PAGE_SIZE) * 128;
printf("open pathname = %s of size %d\n", argv[1], page_size);
fd = open(argv[1], O_RDWR | O_SYNC);
if (fd < 0) {
perror("open");
assert(0);
}
printf("fd = %d\n", fd);
/* mmap twice for double fun. */
puts("mmap 1");
//address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
//address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE, fd, 0);
address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (address1 == MAP_FAILED) {
perror("mmap");
assert(0);
}
printf("address1 %lx\n", address1);
/* Read and modify memory. */
puts("access 1");
//assert(!strcmp(address1, "asdf"));
memset(address1, 7, page_size);
unsigned long offset = 4500;
/* vm_fault */
printf("address1 + offset %lx\n", address1 + offset);
strcpy(address1 + offset, "qwer");
/* Check that the physical addresses are the same.
* They are, but TODO why virt_to_phys on kernel gives a different value? */
/* Check that modifications made from userland are also visible from the kernel. */
// int ret = read(fd, buf, BUFFER_SIZE);
// printf("ret %d\n", ret);
// assert(!memcmp(buf, "qwer", BUFFER_SIZE));
/* Modify the data from the kernel, and check that the change is visible from userland. */
// write(fd, "zxcv", 4);
// assert(!strcmp(address1+offset, "zxcv"));
/* Cleanup. */
puts("munmap 1");
if (munmap(address1, page_size)) {
perror("munmap");
assert(0);
}
puts("close");
close(fd);
return EXIT_SUCCESS;
}

View File

@ -1,190 +0,0 @@
#include "ap_int.h"
#include "hls_stream.h"
#include "shift_flex.h"
#include "dma.h"
#include "para.h"
template<int MAX_D, int MAX_C, int PA, int PE>
void shiftLayer_FIXED(hls::stream<ap_uint<FM_W*PA> > &fmap,
hls::stream<ap_uint<FM_W*PA> > &out,
const ap_uint<W_W*PA*PE> *k0,
const ap_uint<W_W*PA*PE> *k1,
const T_SUM *th0,
const T_SUM *th1,
const int FM_D,
const int IN_CH,
bool skip_maxPool,
int batch){
#pragma HLS INLINE
const int MID_CH = skip_maxPool?IN_CH:IN_CH << 1;
const int MID_D = skip_maxPool? FM_D : FM_D>>1;
hls::stream<ap_uint<SUM_W*PE> > s_conv0;
#pragma HLS STREAM variable=s_conv0 depth=16 dim=1
hls::stream<ap_uint<FM_W*PE> > s_relu0;
#pragma HLS STREAM variable=s_relu0 depth=16 dim=1
_conv2d_flex<MAX_C, PA, PE, FM_W, W_W, SUM_W, T_FMAP, T_W, T_SUM>(fmap, s_conv0, k0, FM_D, IN_CH, MID_CH, batch);
_relu_flex<SUM_W, FM_W, PE, T_SUM>(s_conv0, s_relu0, th0, FM_D, MID_CH, batch);
hls::stream<ap_uint<FM_W*PE> > s_pool, s_shift;
#pragma HLS STREAM variable=s_pool depth=16 dim=1
_max_pool_2x2<MAX_D, MAX_C, FM_W, PE>(s_relu0, s_pool, FM_D, MID_CH, skip_maxPool, batch);
_shift_flex<MAX_D, MAX_C, FM_W, PE>(s_pool, s_shift, MID_D, MID_CH, batch);
hls::stream<ap_uint<SUM_W*PA> > s_conv1;
#pragma HLS STREAM variable=s_conv1 depth=16 dim=1
_conv2d_flex<MAX_C, PE, PA, FM_W, W_W, SUM_W, T_FMAP, T_W, T_SUM>(s_shift, s_conv1, k1, MID_D, MID_CH, MID_CH, batch);
_relu_flex<SUM_W, FM_W, PA, T_SUM>(s_conv1, out, th1, MID_D, MID_CH, batch);
}
template<int MAX_D, int MAX_C, int PE>
void shiftLayer_RES(hls::stream<ap_uint<FM_W*PE> > &fmap,
hls::stream<ap_uint<FM_W*PE> > &out,
const ap_uint<W_W*PE*PE> *k2,
const T_SUM *th0,
const int FM_D,
const int IN_CH,
bool skip,
int batch){
#pragma HLS INLINE
const int MID_CH = skip?IN_CH:IN_CH << 1;
const int MID_D = skip? FM_D : FM_D>>1;
hls::stream<ap_uint<FM_W*PE> > s_pool;
#pragma HLS STREAM variable=s_pool depth=16 dim=1
_max_pool_2x2<MAX_D, MAX_C, FM_W, PE>(fmap, s_pool, FM_D, IN_CH, skip, batch);
hls::stream<ap_uint<SUM_W*PE> > s_conv0;
#pragma HLS STREAM variable=s_conv0 depth=16 dim=1
_conv2d_flex<MAX_C, PE, PE, FM_W, W_W, SUM_W, T_FMAP, T_W, T_SUM>(s_pool, s_conv0, k2, MID_D, IN_CH, MID_CH, skip, batch);
_relu_flex<SUM_W, FM_W, PE, T_SUM>(s_conv0, out, th0, MID_D, MID_CH, skip, batch);
}
void wrapper(ap_uint<FM_W*PA_0>* fmap, ap_uint<FM_W*PA_0> * out,
ap_uint<W_W * PE_0 * PA_0> *k0,
ap_uint<W_W * PE_0 * PA_0> *k1,
ap_uint<W_W * PA_0 * PA_0> *k2,
int FM_D,
int FM_CH,
int th_i,
bool pool,
int batch){
//#pragma HLS INLINE
#pragma HLS INLINE off
const int MAX_LAYERS = 16;
const int MAX_D = 224;
const int MAX_CH = 1024;
const int MID_CH = pool? FM_CH<<1:FM_CH;
const int MID_D = pool? FM_D>>1 : FM_D;
#pragma HLS DATAFLOW
const T_SUM th0[MAX_LAYERS][(1<<FM_W)-1]={
#include "th.txt"
};
const T_SUM th1[MAX_LAYERS][(1<<FM_W)-1]={
#include "th.txt"
};
const T_SUM th2[MAX_LAYERS][(1<<FM_W)-1]={
#include "th.txt"
};
hls::stream<ap_uint<FM_W*PA_0> > st_layer0;
#pragma HLS STREAM variable=st_layer0 depth=16 dim=1
hls::stream<ap_uint<FM_W*PA_0> > out_layer;
#pragma HLS STREAM variable=out_layer depth=16 dim=1
M2S<ap_uint<FM_W*PA_0>, ap_uint<FM_W*PA_0> >(fmap, st_layer0, FM_D*FM_D*FM_CH/PA_0*batch);
hls::stream<ap_uint<FM_W*PA_0> > out_left, out_right;
#pragma HLS STREAM variable=out_left depth=16 dim=1
#pragma HLS STREAM variable=out_right depth=16 dim=1
hls::stream<ap_uint<FM_W*PA_0> > left, right;
#pragma HLS STREAM variable=left depth=16 dim=1
#pragma HLS STREAM variable=right depth=4*16*256/32 dim=1
splitStream(st_layer0, left, right, FM_CH/PA_0, FM_D*FM_D*batch);
// shift-layer
shiftLayer_FIXED<MAX_D, MAX_CH, PA_0, PE_0>(left,out_left,
k0, k1, th0[th_i], th1[th_i],
FM_D, FM_CH>>1, !pool,batch);
shiftLayer_RES<MAX_D, MAX_CH, PA_0>(right,out_right,
k2, th2[th_i],
FM_D, FM_CH>>1, !pool,batch);
mergeStream(out_left, out_right, out_layer, MID_CH/PA_0, MID_D*MID_D*batch);
//template<typename T_OUT, typename T_IN>
//void S2M(hls::stream<T_IN> &s_mem, T_OUT *mem, int REP){
//
S2M<ap_uint<FM_W*PA_0>, ap_uint<FM_W*PA_0>>(out_layer, out, MID_D*MID_D*MID_CH/PA_0*batch);
}
extern "C"
void top(ap_uint<FM_W*PA_0>* fmap, ap_uint<FM_W*PA_0> * out,
ap_uint<W_W * PE_0 * PA_0> *k0,
ap_uint<W_W * PE_0 * PA_0> *k1,
ap_uint<W_W * PA_0 * PA_0> *k2,
int FM_D,
int FM_CH,
int th_i,
bool pool,
int batch
){
#pragma HLS INTERFACE m_axi port=fmap offset=slave bundle=gmem depth=4*32*32*128/32
#pragma HLS INTERFACE s_axilite port=fmap bundle=control
#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem3 depth=4*32*32*128/32
#pragma HLS INTERFACE s_axilite port=out bundle=control
#pragma HLS INTERFACE m_axi port=k0 bundle=gmem0 depth=128*128/32/32
#pragma HLS INTERFACE s_axilite port=k0 bundle=control
#pragma HLS INTERFACE m_axi port=k1 bundle=gmem1 depth=128*128/32/32
#pragma HLS INTERFACE s_axilite port=k1 bundle=control
#pragma HLS INTERFACE m_axi port=k2 bundle=gmem2 depth=128*128/32/32
#pragma HLS INTERFACE s_axilite port=k2 bundle=control
#pragma HLS INTERFACE s_axilite port=FM_D bundle=control
#pragma HLS INTERFACE s_axilite port=FM_CH bundle=control
#pragma HLS INTERFACE s_axilite port=th_i bundle=control
#pragma HLS INTERFACE s_axilite port=pool bundle=control
#pragma HLS INTERFACE s_axilite port=batch bundle=control
#pragma HLS INTERFACE s_axilite port=return bundle=control
// const int MAX_LAYERS = 16;
// const int MAX_D = 224;
// const int MAX_CH = 1024;
// const int MID_CH = pool? FM_CH<<1:FM_CH;
// const int MID_D = pool? FM_D>>1 : FM_D;
//
// ap_uint<W_W * PE_0 * PA_0> k0_buffer[512 * 512 / PE_0 * PA_0];
// ap_uint<W_W * PE_0 * PA_0> k1_buffer[512 * 512 / PE_0 * PA_0];
// ap_uint<W_W * PE_0 * PA_0> k2_buffer[512 * 512 / PE_0 * PA_0];
//
// int C = FM_CH>>1;
// static const int MOCPE = MAX_CH/PE_0;
// static const int MCPA = MAX_CH/PA_0;
// const int CPA = C/PA_0;
// const int OCPE = (MID_CH >> 1)/PE_0;
//
// for(int c=0;c<MCPA && c<CPA;c++){
// for(int n=0;n<MOCPE && n<OCPE;n++){
// ap_uint<W_W * PE_0 * PA_0> k0_buffer = k0[c * OCPE + n];
// }
// }
// for(int c=0;c<MCPA && c<CPA;c++){
// for(int n=0;n<MOCPE && n<OCPE;n++){
// ap_uint<W_W * PE_0 * PA_0> k1_buffer = k1[c * OCPE + n];
// }
// }
//
// const int MOCPE2 = MAX_CH/PA_0;
// const int OCPE2=(MID_CH >> 1)/PA_0;
// for(int c=0;c<MCPA && c<CPA;c++){
// for(int n=0;n<MOCPE2 && n<OCPE2;n++){
// ap_uint<W_W * PE_0 * PA_0> k2_buffer = k2[c * OCPE + n];
// }
// }
// wrapper(fmap,out,k0_buffer,k1_buffer,k2_buffer,FM_D, FM_CH, th_i, pool, batch);
wrapper(fmap,out,k0,k1,k2,FM_D, FM_CH, th_i, pool, batch);
}

View File

@ -1,8 +1,8 @@
#include "/scratch/qijing.huang/firesim_new/hls/sw/bm//mmio.h"
#include "/home/centos/hls-fs/hls/sw/bm//mmio.h"
#define ACCEL_BASE 0x20000
#define AP_DONE_MASK 0b10
#define ACCEL_INT 0x4
void vgg_wrapper() {
void top_wrapper() {
// Disable Interrupt
reg_write32(ACCEL_BASE + ACCEL_INT, 0x0);

View File

@ -0,0 +1,208 @@
#!/usr/bin/perl
use warnings;
use strict;
use Cwd;
use File::Copy;
use List::Util qw(first);
use Tie::IxHash;
# Inputs: file_name, func_name, func_base_addr, prefix(Optional)
my $dir = getcwd;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $func_base_addr = $ARGV[2];
my $prefix = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 3) {
$prefix = $ARGV[3];
}
my $rdir = $ENV{'RDIR'};
#print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source sourceme-f1.sh!\n");
exit();
}
my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
my $wrapper_func_name = $func_name."_wrapper";
my $wrapper_header= "bm_wrapper.h";
if ($prefix) {
$func_name = $prefix.$func_name;
}
my $bm_inc_path = $rdir."/hls/sw/bm/";
#############################PARSE Verilog##############################
my %var_dict;
tie %var_dict, "Tie::IxHash";
my $verilog_file = "$dir/../verilog/$func_name"."_control_s_axi.v";
print "Parsing ".$verilog_file."\n";
# parse the verilog file to get the info we need
if(!open VERILOG, "$verilog_file"){
print $!;
} else {
my $start = 0;
my $line = undef;
while(<VERILOG>){
$line = $_;
if($line =~ m/------------------------Parameter----------------------/){
$start = 0;
}
if($start){
if($line =~ m/(0x\S+) : Data signal of (\S+)/){
my $base_addr = $1;
my $var = $2;
#print("$base_addr : $var\n");
if (exists $var_dict{$var}) {
push (@{$var_dict{$var}}, $base_addr);
} else {
my @addr = ();
push (@addr, $base_addr);
$var_dict{$var} = \@addr;
}
}
}
if($line =~ m/------------------------Address Info------------------/){
$start= 1;
}
}
}
#############################GENERATE Software Bare-metal Wrappers##############################
# We want ordered hash so we didn't add this piece of code into a func
#sub generate_bm_wrapper {
# my %var_dict=%{$_[0]};
# tie %var_dict, "Tie::IxHash";
# my $func_base_addr = $_[1];
foreach my $var (keys %var_dict) {
print($var.": ");
my @addr = @{$var_dict{$var}};
foreach my $base_addr(@addr) {
print($base_addr."\t");
}
print("\n");
}
my $wrapper = '#include "'.$bm_inc_path.'/mmio.h"'."\n";
#$wrapper .= '#include "'.$bm_inc_path.'/time.h"'."\n";
$wrapper .= '#define ACCEL_BASE '.$func_base_addr."\n";
$wrapper .= "#define AP_DONE_MASK 0b10\n";
$wrapper .= "#define ACCEL_INT 0x4\n";
foreach my $var (keys %var_dict) {
my @addr = @{$var_dict{$var}};
my $idx = 0;
foreach my $base_addr(@addr) {
$wrapper .="#define "."ACCEL_$var"."_$idx"." $base_addr\n";
$idx +=1;
}
}
my $ap_return = 0;
my $ap_return_type = "uint32_t";
if (exists $var_dict{"ap_return"}) {
my $size=@{$var_dict{"ap_return"}};
if ($size == 2){
$ap_return_type = "uint64_t";
}
$ap_return = 1;
}
if ($ap_return){
$wrapper .= $ap_return_type." $wrapper_func_name(";
} else {
$wrapper .="void $wrapper_func_name(";
}
my @arglist=();
foreach my $var (keys %var_dict) {
if ($var eq "ap_return") {
next;
}
my $var_type = "uint32_t";
my $size=@{$var_dict{$var}};
if ($size == 2){
$var_type = "uint64_t";
}
push(@arglist, "$var_type $var");
}
my $args = join ', ', @arglist;
$wrapper.= $args.") {";
$wrapper.= '
// Disable Interrupt
reg_write32(ACCEL_BASE + ACCEL_INT, 0x0);
';
foreach my $var (keys %var_dict) {
if ($var eq "ap_return") {
next;
}
my @addr = @{$var_dict{$var}};
my $idx = 0;
foreach my $base_addr(@addr) {
my $shift = "";
if ($idx == 1){
$shift = " >> 32";
}elsif($idx > 1){
die "Index exceeds limit!\n";
}
$wrapper .=" reg_write32(ACCEL_BASE + ACCEL_$var"."_$idx, (uint32_t) ($var$shift));\n";
$idx +=1;
}
}
$wrapper .='
// Write to ap_start to start the execution
reg_write32(ACCEL_BASE, 0x1);
// Done?
int done = 0;
while (!done){
done = reg_read32(ACCEL_BASE) & AP_DONE_MASK;
}
';
# If there a return value
if ($ap_return){
my @addr = @{$var_dict{"ap_return"}};
$wrapper .= "
$ap_return_type ret_val = 0;\n";
my $idx = 0;
foreach my $base_addr(@addr) {
my $shift = "";
if ($idx == 1){
$shift = " >> 32";
}elsif($idx > 1){
die "Index exceeds limit!\n";
}
$wrapper .=" ret_val = (reg_read32(ACCEL_BASE + ACCEL_ap_return"."_$idx)$shift) | ret_val;\n";
$idx +=1;
}
$wrapper .= " return ret_val;\n";
}
$wrapper .="}\n";
open FILE, "> $wrapper_header";
print FILE $wrapper;
#}
#generate_bm_wrapper(\%var_dict, $func_base_addr);

View File

@ -0,0 +1,98 @@
// Based on code by Schuyler Eldridge. Copyright (c) Boston University
// https://github.com/seldridge/rocket-rocc-examples/blob/master/src/main/c/rocc.h
#ifndef SRC_MAIN_C_ROCC_H
#define SRC_MAIN_C_ROCC_H
#include <stdint.h>
#define STR1(x) #x
#define STR(x) STR1(x)
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
#define CUSTOM_0 0b0001011
#define CUSTOM_1 0b0101011
#define CUSTOM_2 0b1011011
#define CUSTOM_3 0b1111011
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
CUSTOMX_OPCODE(X) | \
(rd << (7)) | \
(xs2 << (7+5)) | \
(xs1 << (7+5+1)) | \
(xd << (7+5+2)) | \
(rs1 << (7+5+3)) | \
(rs2 << (7+5+3+5)) | \
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
// Standard macro that passes rd, rs1, and rs2 via registers
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
#define ROCC_INSTRUCTION_D(X, rd, funct) \
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
#define ROCC_INSTRUCTION(X, funct) \
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
// rd, rs1, and rs2 are data
// rd_n, rs_1, and rs2_n are the register numbers to use
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
: "=r" (rd_) \
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
register uint64_t rd_ asm ("x" # rd_n); \
asm volatile ( \
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
: "=r" (rd_)); \
rd = rd_; \
}
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
}
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
:: [_rs1] "r" (rs1_)); \
}
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
asm volatile ( \
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
}
#endif // SRC_MAIN_C_ACCUMULATOR_H

View File

@ -0,0 +1,561 @@
#!/usr/bin/perl
use warnings;
use strict;
use Cwd;
use File::Copy;
use List::Util qw(first);
# Inputs: file_name, func_name, func_base_addr, prefix(Optional)
my $dir = getcwd;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $func_base_addr = $ARGV[2];
my $rdir = $ENV{'RDIR'};
my $prefix = undef;
my $i = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 3) {
$prefix = $ARGV[3];
}
my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
if ($prefix) {
$func_name = $prefix.$func_name;
}
#print $rdir;
if ((not defined($rdir)) or $rdir eq '') {
print("Please source sourceme-f1.sh!\n");
exit();
}
my $build_sbt = '
organization := "edu.berkeley.cs"
version := "1.0"
name := "hls_test_c"';
$build_sbt=~ s/test_c/$func_name/g;
my $build_sbt_path= "$bm_path/"."build.sbt";
open BUILD, ">$build_sbt_path";
print BUILD $build_sbt;
close BUILD;
my $verilog_file = "$dir/../verilog/$func_name".".v";
my $line = undef;
my @verilog_param = ();
my @param_val = ();
my @verilog_input = ();
my @verilog_input_size = ();
my @verilog_output = ();
my @verilog_output_size = ();
#my $m_axi_data_width = undef;
#my $s_axi_data_width = undef;
my @bus_names=();
my @m_axi_data_widths = ();
my $s_axi_data_width = undef;
print "Parsing ".$verilog_file."\n";
# parse the verilog file to get the info we need
if(!open VERILOG, "$verilog_file"){
print $!;
} else {
while(<VERILOG>){
$line = $_;
# Match AXI4 parameter
if($line =~ m/parameter\s+(C_\S+) =\s+(.*);/){
my $param = $1;
my $val = $2;
$param .="";
if($param =~ m/C_M_AXI_(\S+)_DATA_WIDTH/){
my $bus_name = lc $1;
#$m_axi_data_width = $val;
push(@bus_names, $bus_name);
push(@m_axi_data_widths, $val);
}
if ($param eq "C_S_AXI_DATA_WIDTH") {
$s_axi_data_width = $val;
}
push (@verilog_param, $param);
push (@param_val, $val);
} elsif($line =~ m/^\s*input\s+(.*)/){
my $input = $1;
#print "input:$input\n";
if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $input_name = $3;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
my $size = 0;
if ($end =~ m/^\d+$/){
$size = $end - $start + 1;
$size = "".$size;
} elsif($end =~m/(\S+) - 1/) {
$size = $1;
}
push(@verilog_input_size, $size);
}elsif ($input =~ m/\s*(.*)\s*;/){
my $input_name = $1;
#print "here!"."$input_name\n";
push (@verilog_input, $input_name);
push(@verilog_input_size, "1");
}
}elsif($line =~ m/^\s*output\s+(.*)/){
my $output = $1;
#print "output:$output\n";
if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
my $end = $1;
my $start = $2;
my $output_name = $3;
#print "here!"."$output_name\n";
push(@verilog_output, $output_name);
my $size = 0;
if ($end =~ m/^\d+$/){
$size = $end - $start + 1;
$size = "".$size;
} elsif($end =~m/(\S+) - 1/) {
$size = $1;
}
push(@verilog_output_size, $size);
}elsif ($output =~ m/\s*(.*)\s*;/){
my $output_name = $1;
#print "here!"."$output_name\n";
push (@verilog_output, $output_name);
push(@verilog_output_size, "1");
}
}
}
print("Parameters: ");
my $param_str = join ' ', @verilog_param;
print $param_str."\n";
print("Inputs: ");
my $in_str = join ' ', @verilog_input;
print $in_str."\n";
print("Outputs: ");
my $out_str = join ' ', @verilog_output;
print $out_str."\n";
}
#creat scala folder
my $scala_dir = "$dir/../scala";
mkdir $scala_dir unless (-d $scala_dir);
##############################################################################################################################
if(@m_axi_data_widths < 1){
push(@bus_names, "gmem_dummy");
push(@m_axi_data_widths, 32);
}
if(not defined($s_axi_data_width)) {
$s_axi_data_width=32
}
print "Generating BlackBox file ...\n";
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
print "m_axi_data_width_ $bus_names[$i]= $m_axi_data_widths[$i]\n";
}
print "s_axi_data_width = $s_axi_data_width\n";
# should be under scala folder
open BB, ">$scala_dir/$func_name"."_blackbox.scala";
my $blackbox1 = "
package hls_test_c
import Chisel._
import freechips.rocketchip.config.{Parameters, Field}
import freechips.rocketchip.tile._
import freechips.rocketchip.util._
class test_c() extends BlackBox() {
";
$blackbox1 =~ s/test_c/$func_name/g;
# Print parameters
my $i = undef;
for( $i = 0; $i < @verilog_param; $i = $i + 1 ){
$blackbox1 .= "val $verilog_param[$i] = $param_val[$i]\n";
}
print BB $blackbox1;
print BB "\tval io = new Bundle {\n";
my $bb_body = "";
# now if the input name does not start with ap, we assume it is an arg
my $ap_return = 0;
my $ap_clk = 0;
my $ap_rst = 0;
my $ap_rst_n = 0;
my @verilog_axi_io = ();
for( $i = 0; $i < @verilog_input; $i = $i + 1 ){
my $input_name = $verilog_input[$i];
my $input_size = $verilog_input_size[$i];
if ($input_name =~ m/^ap_clk$/){
$ap_clk = 1;
}
elsif ($input_name =~ m/^ap_rst$/){
$ap_rst = 1;
}
elsif ($input_name =~ m/^ap_rst_n$/){
$ap_rst_n = 1;
}
elsif($input_name =~ m/^(m_axi|s_axi)\S+$/){
push (@verilog_axi_io, $input_name);
}
print BB "\t\tval $input_name = ";
if ($input_name =~ m/ap_clk(.*)/){
print BB "Clock\(INPUT\)\n";
}else{
print BB "Bits\(INPUT, width = $input_size\)\n";
}
}
for( $i = 0; $i < @verilog_output; $i = $i + 1 ){
my $output_name = $verilog_output[$i];
my $output_size = $verilog_output_size[$i];
if ($output_name =~ m/ap_return(.*)/){
$ap_return = 1;
}
elsif($output_name =~ m/^(m_axi|s_axi)\S+$/){
push (@verilog_axi_io, $output_name);
}
print BB "\t\tval $output_name = ";
print BB "Bits(OUTPUT, width = $output_size)\n";
}
print BB "\t}\n";
print BB "}\n";
close BB;
##############################################################################################################################
print "Generating Control file ...\n";
open CT, ">$scala_dir/$func_name"."_accel.scala";
#TODO Fix AXI4 params
my $control1 = '
package hls_test_c
import chisel3._
import chisel3.util._
import freechips.rocketchip.config.{Field, Parameters}
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.amba.axi4._
import freechips.rocketchip.util._
import freechips.rocketchip.subsystem._
class HLStest_cAXI (address: BigInt = 0x20000, beatBytes: Int = 8) (implicit p: Parameters) extends LazyModule {
val numInFlight = 8
';
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
$control1 .="
val node_$bus_names[$i] = AXI4MasterNode(Seq(AXI4MasterPortParameters(
masters = Seq(AXI4MasterParameters(
name = \"axil_hub_mem_out_$i\",
id = IdRange(0, numInFlight),
aligned = true,
maxFlight = Some(8)
)),
userBits = 0
)
))";
}
$control1 .='
val slave_node = AXI4SlaveNode(Seq(AXI4SlavePortParameters(
slaves = Seq(AXI4SlaveParameters(
address = List(AddressSet(address,0x4000-1)),
regionType = RegionType.UNCACHED,
supportsWrite = TransferSizes(1, beatBytes),
supportsRead = TransferSizes(1, beatBytes),
interleavedId = Some(0)
)),
beatBytes = beatBytes
)))
lazy val module = new HLStest_cAXIModule(this)
}
class HLStest_cAXIModule(outer: HLStest_cAXI) extends LazyModuleImp(outer) {
//val (out, edge) = outer.node.out(0)
val (slave_in, slave_edge) = outer.slave_node.in(0)
val bId = Reg(UInt(32.W))
val rId = Reg(UInt(32.W))
val bb = Module(new test_c())
';
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
$control1 .="
val (out_$bus_names[$i], edge_$bus_names[$i]) = outer.node_$bus_names[$i].out(0)";
}
$control1 .= "\n";
$control1 =~ s/s_axi_data_width/$s_axi_data_width/g;
if ($ap_clk eq 1){
$control1 .= "\tbb.io.ap_clk := clock\n";
}
if ($ap_rst eq 1){
$control1 .= "\tbb.io.ap_rst := reset\n";
}
if ($ap_rst_n eq 1){
$control1 .= "\tbb.io.ap_rst_n := !reset.toBool() \n";
}
$control1 =~ s/test_c/$func_name/g;
print CT $control1;
#TODO modify accelerator arg!
my $control2 = '
';
# TODO Add support for multiple AXI buses
# AXI Inputs Signals
for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){
my $number = $i + 1;
if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.ready\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.valid\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.data\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)LAST$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.last\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)ID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.id\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)RESP$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.resp\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.valid\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|AR)ADDR$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.addr\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.data\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)STRB$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.strb\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.ready\n";
}
}
for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){
my $number = $i + 1;
if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.valid := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.ready := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ADDR$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.addr := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.id := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LEN$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.len := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)SIZE$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.size := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)BURST$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.burst := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LOCK$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.lock := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)CACHE$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.cache := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)PROT$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.prot := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)QOS$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.qos := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)REGION$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\t//out_$bus_name.$type.bits.region := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.data := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)STRB$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.strb := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)LAST$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tout_$bus_name.$type.bits.last := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)READY$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.ready := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)VALID$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.valid := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R)DATA$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.bits.data := bb.io.$verilog_axi_io[$i]\n";
}
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)RESP$/){
my $bus_name = $1;
my $type = lc $2;
$control2 .= "\tslave_in.$type.bits.resp := bb.io.$verilog_axi_io[$i]\n";
}
}
if ($ap_return eq 1){
$control2 = $control2."\tval ap_return = accel.io.ap.rtn\n";
}
$control2 .= "
// For AXI4lite, these two signals are always True
slave_in.r.bits.last := true.B
when(slave_in.aw.fire()){
bId := slave_in.aw.bits.id
}
when(slave_in.ar.fire()){
rId := slave_in.ar.bits.id
}
slave_in.r.bits.id := rId
slave_in.b.bits.id := bId
}
";
# TODO Fix the width here
$control2 .='
trait HasPeripheryHLStest_cAXI { this: BaseSubsystem =>
private val address = BigInt(base_addr)
private val axi_m_portName = "HLS-Accelerator-test_c-master"
private val axilite_s_portName = "HLS-Accelerator-test_c-slave"
val accel_s_axi_width = s_axi_data_width
//val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, sbus.beatBytes))
val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, accel_s_axi_width >> 3))
';
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
$control2 .="
sbus.fromPort(Some(axi_m_portName)) {
(TLWidthWidget($m_axi_data_widths[$i]>> 3 )
:= AXI4ToTL()
:= AXI4UserYanker()
:= AXI4Fragmenter()
:= AXI4IdIndexer(1))
}:=* hls_test_c_accel.node_$bus_names[$i]
";
}
$control2 .='
hls_test_c_accel.slave_node :=* sbus.toFixedWidthPort(Some(axilite_s_portName)) {
(AXI4Buffer()
:= AXI4UserYanker()
//:= AXI4IdIndexer(params.idBits)
//:= AXI4Deinterleaver(sbus.blockBytes) // Assume there is no iterleaved requests, iterleaveId = Some(0)
:= TLToAXI4()
:= TLBuffer()
//:= TLWidthWidget(accel_s_axi_width >> 3)
// Compared to TLWidthWidget, TLFragmenter saves the id info?
:= TLFragmenter(accel_s_axi_width >> 3, 64, true))
}
}
trait HasPeripheryHLStest_cAXIImp extends LazyModuleImp {
val outer: HasPeripheryHLStest_cAXI
}';
$control2 =~ s/test_c/$func_name/g;
$control2 =~ s/base_addr/$func_base_addr/g;
$control2 =~ s/s_axi_data_width/$s_axi_data_width/g;
print CT $control2;

View File

@ -0,0 +1,100 @@
#!/usr/bin/perl
use warnings;
use strict;
use Cwd;
use File::Copy;
my $file_name = $ARGV[0];
my $func_name = $ARGV[1];
my $prefix = undef;
my $num_args = $#ARGV + 1;
if ($num_args > 2) {
$prefix = $ARGV[2];
}
#############################GENERATE HLS##############################
# Generate directive file based on LLVM emitted output
# If the variable is of pointer type that an ap_bus interface is generated
my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "test_c_func" test_var
';
my $prefix_tcl = "";
if ($prefix) {
$prefix_tcl = "config_rtl -prefix ".$prefix."\n";
}
my $hls_pgm = undef;
if (-f $file_name.".cpp"){
$hls_pgm = $file_name.'.cpp -cflags "-std=c++0x" ';
} else {
$hls_pgm = $file_name.".c";
}
# should change to add all .c files
my $hls_tcl = 'open_project -reset test_c_prj
set_top test_c_func
add_files hls_pgm
open_solution -reset "solution1"
set_part {xcvu9p-flgb2104-2-i}
config_compile -ignore_long_run_time
create_clock -period 10 -name default
'.$prefix_tcl.'
#source "./test_c_prj/solution1/directives.tcl"
#config_interface -clock_enable
config_interface -m_axi_addr64
csynth_design
#export_design -format ip_catalog
exit';
my $dir = getcwd;
open HLS, ">$dir/run_hls.tcl";
# replace the function name and file name
$hls_tcl =~ s/test_c_func/$func_name/g;
$hls_tcl =~ s/test_c/$file_name/g;
$hls_tcl =~ s/hls_pgm/$hls_pgm/g;
# run vivado hls
print HLS $hls_tcl;
system("vivado_hls -f run_hls.tcl");
my $vivado_dir = "$dir/$file_name"."_prj/solution1/syn/verilog/";
my $verilog_dir = "$dir/../verilog/";
mkdir $verilog_dir unless (-d $verilog_dir);
unlink glob "$verilog_dir/*";
opendir(DIR, $vivado_dir) or die "Can't opendir $vivado_dir: $! \n";
my @files=readdir(DIR);
closedir(DIR);
foreach my $v_file (@files){
# Open and replace one line
chdir($vivado_dir);
my $vivado_dir_escape = $vivado_dir;
$vivado_dir_escape =~ s/\//\\\//g;
my $perl_cmd = "perl -p -i -e 's/\$readmemh\\\(\\\"\\\.\/\$readmemh(\\\"$vivado_dir_escape/g' *";
print $perl_cmd;
system ($perl_cmd);
$perl_cmd = "perl -p -i -e \"s/'bx/1'b0/g\" *";
system ($perl_cmd);
print $perl_cmd;
chdir($dir);
print "$v_file\n";
if (-f "$vivado_dir/$v_file") {
copy("$vivado_dir/$v_file", $verilog_dir) or die "File cannot be copied! $v_file $verilog_dir\n";
}
}
#die $!;

View File

@ -0,0 +1,15 @@
open_project -reset dialacnet_tl_prj
set_top top
add_files dialacnet_tl.cpp -cflags "-std=c++0x"
open_solution -reset "solution1"
set_part {xcvu9p-flgb2104-2-i}
config_compile -ignore_long_run_time
create_clock -period 10 -name default
config_rtl -prefix tl0_
#source "./dialacnet_tl_prj/solution1/directives.tcl"
#config_interface -clock_enable
config_interface -m_axi_addr64
csynth_design
#export_design -format ip_catalog
exit

View File

@ -0,0 +1,32 @@
****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2017.4.op (64-bit)
**** SW Build 2193837 on Tue Apr 10 18:06:59 MDT 2018
**** IP Build 2189296 on Tue Apr 10 19:39:46 MDT 2018
** Copyright 1986-2017 Xilinx, Inc. All Rights Reserved.
source /opt/Xilinx/Vivado/2017.4.op/scripts/vivado_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado/2017.4.op/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'centos' on host 'ip-192-168-2-23.us-west-2.compute.internal' (Linux_x86_64 version 3.10.0-693.21.1.el7.x86_64) on Wed Dec 05 01:59:13 UTC 2018
INFO: [HLS 200-10] On os "CentOS Linux release 7.4.1708 (Core) "
INFO: [HLS 200-10] In directory '/home/centos/hls-fs/target-design/firechip/hls_dialacnet_tl_top/src/main/c'
INFO: [HLS 200-10] Creating and opening project '/home/centos/hls-fs/target-design/firechip/hls_dialacnet_tl_top/src/main/c/dialacnet_tl_prj'.
INFO: [HLS 200-10] Adding design file 'dialacnet_tl.cpp' to the project
INFO: [HLS 200-10] Creating and opening solution '/home/centos/hls-fs/target-design/firechip/hls_dialacnet_tl_top/src/main/c/dialacnet_tl_prj/solution1'.
INFO: [HLS 200-10] Cleaning up the solution database.
INFO: [HLS 200-10] Setting target device to 'xcvu9p-flgb2104-2-i'
WARNING: [ANALYSIS 214-1] Skip long-run-time warning caused by lots of load/store instructions.
INFO: [SYN 201-201] Setting up clock 'default' with a period of 10ns.
INFO: [HLS 200-10] Analyzing design file 'dialacnet_tl.cpp' ...
WARNING: [HLS 200-40] In file included from dialacnet_tl.cpp:5:
./para.h:4:10: fatal error: '/ecad/tools/xilinx/Vivado/2018.2/include/gmp.h' file not found
#include "/ecad/tools/xilinx/Vivado/2018.2/include/gmp.h"
^
1 error generated.
C preprocessor failed.
while executing
"source [lindex $::argv 1] "
("uplevel" body line 1)
invoked from within
"uplevel \#0 { source [lindex $::argv 1] } "
INFO: [Common 17-206] Exiting vivado_hls at Wed Dec 5 01:59:14 2018...

View File

@ -1,21 +0,0 @@
VERBOSE= 1
TARGET ?= dot_product
FUNC=add
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -1,26 +0,0 @@
VERBOSE= 1
TARGET ?=dot_product_tl
FUNC=dot
LDFLAGS=
CFLAGS=
ifeq ($(CUSTOM_INST), 1)
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
CFLAGS+=-DCUSTOM_DRIVER
endif
ifeq ($(LLVM), 1)
ACCEL ?=0
include ../../../../Makefile.llvm.in
else
ifeq ($(GCC), 1)
include ../../../../Makefile.gcc.in
else
include ../../../../Makefile.bm.in
endif
endif

View File

@ -0,0 +1,19 @@
TARGET ?=dot_product
VERBOSE=1
LDFLAGS=
CFLAGS=
POSTFIX=
ifeq ($(CUSTOM_INST), 1)
POSTFIX=.inst
CFLAGS+=-DCUSTOM_INST
endif
ifeq ($(CUSTOM_DRIVER), 1)
POSTFIX=.driver
CFLAGS+=-DCUSTOM_DRIVER
endif
include ../Makefile

View File

@ -0,0 +1,13 @@
#ifdef CUSTOM_INST
#include "rocc.h"
#endif
uint64_t dot_wrapper(uint64_t a, uint64_t b) {
uint64_t ret_val;
#ifdef CUSTOM_INST
#define XCUSTOM_ACC 0
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, ret_val, a, b, 0);
#endif
return ret_val;
}

View File

@ -17,14 +17,14 @@ double dot(double a[NUMBER_OF_INPUT], double b[NUMBER_OF_INPUT]) {
#pragma HLS PIPELINE
// To prevent burst mode
result[1] += a[i+1] *b[i+1];
result[0] += a[i] *b[i];
result[1] += a[i+1] *b[i+1];
result[2] += a[i+2] *b[i+2];
result[3] += a[i+3] *b[i+3];
result[6] += a[i+6] *b[i+6];
result[4] += a[i+4] *b[i+4];
result[5] += a[i+5] *b[i+5];
result[6] += a[i+6] *b[i+6];
result[7] += a[i+7] *b[i+7];
@ -60,11 +60,15 @@ int main () {
}
double c;
uint64_t start = read_cycle();
#ifdef CUSTOM_INST
c = dot_wrapper(a, b);
#else
c = dot(a, b);
#endif
duration(start, read_cycle());
printf("A . B = %x\n", c);
return 0;

Some files were not shown because too many files have changed in this diff Show More