Merge pull request #1 from hqjenny/hardcodedWorkloads
Hardcoded workloads
This commit is contained in:
commit
7c7e0659ce
|
@ -0,0 +1,9 @@
|
|||
*.o
|
||||
*.a
|
||||
*.driver
|
||||
*.riscv
|
||||
*.rv
|
||||
*.ll
|
||||
*.S
|
||||
*.asm
|
||||
.doit.db
|
|
@ -0,0 +1,62 @@
|
|||
# FireMarshal Workloads for Centrifuge
|
||||
|
||||
## Hardcoded Linux Examples
|
||||
The `hardcoded/` directory contains a number of hand-written benchmarks for
|
||||
linux that use various centrifuge-generated accelerators. Most can be built
|
||||
simply by running `make` in their respective directories. However, to actually
|
||||
use the benchmark you will need add it to a linux-based workload (i.e. kernel +
|
||||
root filesystem). The FireMarshal tool (included with chipyard) helps automate
|
||||
this process.
|
||||
|
||||
## Vadd Example Quickstart
|
||||
If you simply want to try out the example vector add workload, you can use the
|
||||
following commands:
|
||||
|
||||
First, build the workload
|
||||
|
||||
../../tools/firemarshal/marshal build vadd_tl.json vadd_rocc.json
|
||||
|
||||
This command builds two Linux-based workloads that can be booted on a RISC-V
|
||||
SoC. They include a simple test that uses the vector addition accelerators that
|
||||
were generated by centrifuge.
|
||||
|
||||
Next, you will want to install them:
|
||||
|
||||
../../tools/firemarshal/marshal install vadd_tl.json vadd_rocc.json
|
||||
|
||||
This generates a FireSim workload description for the provided benchmarks. You
|
||||
may now use FireSim normally to launch the workloads. They will simply run the
|
||||
benchmark automatically and shutdown (you do not need to interact with the
|
||||
running workload at all).
|
||||
|
||||
## FireMarshal Workload Description Details
|
||||
vadd\_tl.json and vadd\_rocc.json are FireMarshal workload descriptions. These
|
||||
include all the information needed to build a working Linux-based benchmark.
|
||||
Here is the workload description for the tilelink-based accelerator:
|
||||
|
||||
{
|
||||
"name" : "vadd_tl",
|
||||
"base" : "cf-buildroot.json",
|
||||
"workdir" : "hardcoded/vadd_tl",
|
||||
"host-init" : "build.sh",
|
||||
"files" : [["vadd_tl", "/root/"]],
|
||||
"command" : "/root/vadd_tl"
|
||||
}
|
||||
|
||||
These options have the following effects:
|
||||
|
||||
- name: A unique name identifying this benchmark.
|
||||
- base: We have provided you with a base workload that includes the necessary
|
||||
changes to Linux and other common setup tasks. FireMarshal inherits this
|
||||
configuration before building your workload.
|
||||
- workdir: All file paths in this configuration file will be relative to this directory
|
||||
- host-init: This script is run automatically before constructing the Linux
|
||||
root filesystem. In this case, `hardcoded/vadd\_tl/build.sh` simply
|
||||
cross-compiles the vector add example benchmark.
|
||||
- files: This instructs FireMarshal to copy the benchmark into the target root filesystem.
|
||||
- command: FireMarshal will configure the workload to automatically run this
|
||||
command whenver it boots up. This allows for hands-free testing.
|
||||
|
||||
You can find more information on FireMarshal workloads in its
|
||||
[documentation](https://firemarshal.readthedocs.io/en/latest/).
|
||||
|
|
@ -1 +1 @@
|
|||
Subproject commit 560165de57bcee5452602d64f5206fbf0f5d8059
|
||||
Subproject commit 8f5d9162eb07e192dcb4d1562668c685dfa660c5
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name" : "cf-buildroot",
|
||||
"base" : "br-base",
|
||||
"base" : "br-base.json",
|
||||
"workdir" : "cf-base",
|
||||
"linux-src" : "linux"
|
||||
}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
ARCH = riscv64
|
||||
|
||||
CC :=$(ARCH)-unknown-linux-gnu-g++
|
||||
|
||||
CFLAGS += -fpermissive -I../
|
||||
LDFLAGS += -fpermissive
|
||||
|
||||
DEPS := $(wildcard *.c) $(wildcard *.h)
|
||||
src_files := $(TARGET).c
|
||||
OBJECTS = $(src_files:.c=.o)
|
||||
DEPS += $(OBJECTS)
|
||||
|
||||
all: $(TARGET)$(POSTFIX)
|
||||
|
||||
.PHONY: $(TARGET)$(POSTFIX)
|
||||
|
||||
$(TARGET)$(POSTFIX): Makefile $(DEPS)
|
||||
$(CC) $(LDFLAGS) $(OBJECTS) -o $@
|
||||
$(ARCH)-unknown-linux-gnu-objdump -D $@ > $(TARGET).asm
|
||||
|
||||
.c.o: $(DEPS)
|
||||
$(CC) -c $(CFLAGS) $< -o $@
|
||||
|
||||
.cpp.o: $(DEPS)
|
||||
$(CC) -c $(CFLAGS) $< -o $@
|
||||
|
||||
run:
|
||||
spike pk $(TARGET).rv
|
||||
|
||||
print-%:
|
||||
@echo $* = $($*)
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS) $(TARGET).rv $(TARGET).asm $(TARGET).ll $(TARGET).S
|
||||
|
||||
cleanall: clean
|
||||
rm -f $(TARGET).inst $(TARGET) $(TARGET).driver
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=adpcm
|
||||
|
||||
FUNC=encode
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=adpcm_tl
|
||||
|
||||
FUNC=encode
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -0,0 +1,18 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=adpcm
|
||||
|
||||
LDFLAGS=
|
||||
CFLAGS=-Wno-narrowing
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,13 @@
|
|||
|
||||
#ifdef CUSTOM_INST
|
||||
#include "rocc.h"
|
||||
#endif
|
||||
uint64_t decode_wrapper(uint64_t input_r) {
|
||||
uint64_t ret_val;
|
||||
|
||||
#ifdef CUSTOM_INST
|
||||
#define XCUSTOM_ACC 2
|
||||
ROCC_INSTRUCTION_DS(XCUSTOM_ACC, ret_val, input_r, 0);
|
||||
#endif
|
||||
return ret_val;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=adpcm
|
||||
|
||||
FUNC=encode
|
||||
LDFLAGS=
|
||||
CFLAGS=-Wno-narrowing
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -1156,48 +1156,28 @@ void
|
|||
adpcm_main ()
|
||||
{
|
||||
int i, j;
|
||||
uint64_t begin;
|
||||
|
||||
/* reset, initialize required memory */
|
||||
/* reset, initialize required memory */
|
||||
reset ();
|
||||
|
||||
j = 10;
|
||||
|
||||
#ifdef WRAP_encode
|
||||
uint64_t begin;
|
||||
begin = read_cycle();
|
||||
for (i = 0; i < IN_END; i += 2) {
|
||||
#ifdef CUSTOM_INST
|
||||
for (i = 0; i < IN_END; i += 2)
|
||||
{
|
||||
compressed[i / 2] = encode_wrapper(test_data[i], test_data[i + 1]);
|
||||
}
|
||||
compressed[i / 2] = encode_wrapper(test_data[i], test_data[i + 1]);
|
||||
#else
|
||||
for (i = 0; i < IN_END; i += 2)
|
||||
{
|
||||
compressed[i / 2] = encode(test_data[i], test_data[i + 1]);
|
||||
compressed[i / 2] = encode (test_data[i], test_data[i + 1]);
|
||||
#endif
|
||||
}
|
||||
#endif //CUSTOM_INST
|
||||
duration(begin, read_cycle());
|
||||
#endif //WRAP_encode
|
||||
|
||||
#ifdef WRAP_decode
|
||||
begin = read_cycle();
|
||||
#if CUSTOM_INST
|
||||
for (i = 0; i < IN_END; i += 2)
|
||||
{
|
||||
decode_wrapper(compressed[i / 2]);
|
||||
for (i = 0; i < IN_END; i += 2) {
|
||||
decode (compressed[i / 2]);
|
||||
result[i] = xout1;
|
||||
result[i + 1] = xout2;
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < IN_END; i += 2)
|
||||
{
|
||||
decode(compressed[i / 2]);
|
||||
result[i] = xout1;
|
||||
result[i + 1] = xout2;
|
||||
}
|
||||
#endif //CUSTOM_INST
|
||||
duration(begin, read_cycle());
|
||||
#endif //WRAP_decode
|
||||
}
|
||||
|
||||
int
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
#ifdef CUSTOM_INST
|
||||
#include "rocc.h"
|
||||
#endif
|
||||
uint64_t encode_wrapper(uint64_t xin1, uint64_t xin2) {
|
||||
uint64_t ret_val;
|
||||
|
||||
#ifdef CUSTOM_INST
|
||||
#define XCUSTOM_ACC 1
|
||||
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, ret_val, xin1, xin2, 0);
|
||||
#endif
|
||||
return ret_val;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=adpcm_tl
|
||||
|
||||
FUNC=encode
|
||||
LDFLAGS=
|
||||
CFLAGS=-Wno-narrowing
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
Binary file not shown.
|
@ -63,11 +63,47 @@
|
|||
/* */
|
||||
/*************************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#ifdef CUSTOM_DRIVER
|
||||
#include "bm_wrapper.h"
|
||||
#endif
|
||||
#include "time.h"
|
||||
//#include "mmio.h"
|
||||
//#include "time.h"
|
||||
|
||||
#include "../custom_mmap/mmap_driver.c"
|
||||
#include "../os_utils.h"
|
||||
#define ACCEL_CONTROL 0x30000
|
||||
#define AP_DONE_MASK 0b10
|
||||
|
||||
#define ACCEL_INT 0x30004
|
||||
#define ACCEL_A 0x30018
|
||||
#define ACCEL_B 0x30020
|
||||
#define ACCEL_RET 0x30010
|
||||
|
||||
int encode_accel (int a , int b){
|
||||
//printf("start\n");
|
||||
// printf("%d\n", reg_read32(ACCEL_CONTROL) );
|
||||
// Disable interrupt for now
|
||||
//reg_write32(ACCEL_INT, 0x0);
|
||||
access_addr(ACCEL_INT, OUT, 0);
|
||||
// Set up pointer a and pointer b address
|
||||
access_addr(ACCEL_A, OUT, a);
|
||||
access_addr(ACCEL_B, OUT, b);
|
||||
//reg_write32(ACCEL_A, (uint32_t)a);
|
||||
//reg_write32(ACCEL_B, (uint32_t)b);
|
||||
|
||||
// Write to ap_start to start the execution
|
||||
//reg_write32(ACCEL_CONTROL, 0x1);
|
||||
|
||||
access_addr(ACCEL_CONTROL, OUT, 0x1);
|
||||
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
//done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
|
||||
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
|
||||
}
|
||||
int c = access_addr(ACCEL_RET, IN, 0x1);
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
int encode (int, int);
|
||||
void decode (int);
|
||||
|
@ -1167,18 +1203,21 @@ adpcm_main ()
|
|||
|
||||
j = 10;
|
||||
|
||||
uint64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
int64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
|
||||
for (i = 0; i < IN_END; i += 2)
|
||||
{
|
||||
#ifdef CUSTOM_DRIVER
|
||||
compressed[i / 2] = encode_wrapper (test_data[i], test_data[i + 1]);
|
||||
#else
|
||||
compressed[i / 2] = encode (test_data[i], test_data[i + 1]);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
}
|
||||
//for (i = 0; i < 1; i += 2)
|
||||
{
|
||||
#ifdef CUSTOM_DRIVER
|
||||
compressed[i / 2] = encode_accel (test_data[i], test_data[i + 1]);
|
||||
#else
|
||||
compressed[i / 2] = encode (test_data[i], test_data[i + 1]);
|
||||
#endif
|
||||
}
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
|
||||
for (i = 0; i < IN_END; i += 2)
|
||||
{
|
||||
decode (compressed[i / 2]);
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=aes
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../makefiles/Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=aes_tl
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=aes
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -60,20 +60,22 @@
|
|||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "time.h"
|
||||
|
||||
#ifdef CUSTOM_INST
|
||||
#include "rocc.h"
|
||||
#endif
|
||||
|
||||
#include "../custom_mmap/mmap_driver.c"
|
||||
#include "../os_utils.h"
|
||||
int main_result;
|
||||
|
||||
#ifdef CUSTOM_DRIVER
|
||||
#include "bm_wrapper.h"
|
||||
#endif
|
||||
#include "aes.h"
|
||||
#include "aes_enc.c"
|
||||
#include "aes_dec.c"
|
||||
#include "aes_key.c"
|
||||
#include "aes_func.c"
|
||||
|
||||
|
||||
/* ***************** main **************************** */
|
||||
int
|
||||
aes_main (void)
|
||||
|
@ -118,21 +120,33 @@ aes_main (void)
|
|||
key[14] = 79;
|
||||
key[15] = 60;
|
||||
|
||||
int64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_DRIVER
|
||||
encrypt_wrapper(statemt, key, 128128);
|
||||
uint64_t begin, end, dur;
|
||||
encryp (statemt, key);
|
||||
|
||||
begin = read_cycle();
|
||||
volatile int block;
|
||||
#ifdef CUSTOM_INST
|
||||
asm volatile ("fence.i");
|
||||
#define XCUSTOM_ACC 1
|
||||
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block , vtop_translate(statemt), vtop_translate(key), 0);
|
||||
asm volatile ("fence.i");
|
||||
#else
|
||||
encrypt (statemt, key, 128128);
|
||||
decryp (statemt, key);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
decrypt (statemt, key, 128128);
|
||||
end = read_cycle();
|
||||
printf ("\ndecrypto message\t");
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
if (statemt[i] < 16)
|
||||
printf ("0");
|
||||
printf ("%x", statemt[i]);
|
||||
}
|
||||
|
||||
|
||||
duration(begin, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
|
@ -60,7 +60,7 @@
|
|||
*
|
||||
*/
|
||||
int
|
||||
decrypt (int statemt[32], int key[32])
|
||||
decryp (int statemt[32], int key[32])
|
||||
{
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=statemt
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=key
|
||||
|
@ -132,7 +132,7 @@ decrypt (int statemt[32], int key[32])
|
|||
// printf ("0");
|
||||
// printf ("%x", statemt[i]);
|
||||
// }
|
||||
|
||||
//
|
||||
//for (i = 0; i < 16; i++)
|
||||
// main_result += (statemt[i] != out_dec_statemt[i]);
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_enc.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
/* ******* encrypto ************ */
|
||||
int
|
||||
encryp (int statemt[32], int key[32])
|
||||
{
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=statemt
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=key
|
||||
int type = 128128;
|
||||
int i;
|
||||
|
||||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| * Test Vector (added for CHStone) |
|
||||
| out_enc_statemt : expected output data for "encrypt" |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
const int out_enc_statemt[16] =
|
||||
{ 0x39, 0x25, 0x84, 0x1d, 0x2, 0xdc, 0x9, 0xfb, 0xdc, 0x11, 0x85, 0x97,
|
||||
0x19, 0x6a, 0xb, 0x32
|
||||
};
|
||||
|
||||
KeySchedule (type, key);
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
round_val = 0;
|
||||
nb = 4;
|
||||
break;
|
||||
case 192128:
|
||||
round_val = 2;
|
||||
nb = 4;
|
||||
break;
|
||||
case 256128:
|
||||
round_val = 4;
|
||||
nb = 4;
|
||||
break;
|
||||
case 128192:
|
||||
case 192192:
|
||||
round_val = 2;
|
||||
nb = 6;
|
||||
break;
|
||||
case 256192:
|
||||
round_val = 4;
|
||||
nb = 6;
|
||||
break;
|
||||
case 128256:
|
||||
case 192256:
|
||||
case 256256:
|
||||
round_val = 4;
|
||||
nb = 8;
|
||||
break;
|
||||
}
|
||||
AddRoundKey (statemt, type, 0);
|
||||
for (i = 1; i <= round_val + 9; ++i)
|
||||
{
|
||||
ByteSub_ShiftRow (statemt, nb);
|
||||
MixColumn_AddRoundKey (statemt, nb, i);
|
||||
}
|
||||
ByteSub_ShiftRow (statemt, nb);
|
||||
AddRoundKey (statemt, type, i);
|
||||
|
||||
printf ("encrypted message \t");
|
||||
for (i = 0; i < nb * 4; ++i)
|
||||
{
|
||||
if (statemt[i] < 16)
|
||||
printf ("0");
|
||||
printf ("%x", statemt[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=aes
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -60,15 +60,15 @@
|
|||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include<stdint.h>
|
||||
|
||||
#include "../os_utils.h"
|
||||
#include "../custom_mmap/mmap_driver.c"
|
||||
#ifdef CUSTOM_INST
|
||||
#include "bm_wrapper.h"
|
||||
#include "rocc.h"
|
||||
#endif
|
||||
|
||||
int main_result;
|
||||
|
||||
#include "time.h"
|
||||
#include "aes.h"
|
||||
#include "aes_enc.c"
|
||||
#include "aes_dec.c"
|
||||
|
@ -120,35 +120,56 @@ aes_main (void)
|
|||
key[14] = 79;
|
||||
key[15] = 60;
|
||||
|
||||
uint64_t begin;
|
||||
|
||||
#ifdef WRAP_encrypt
|
||||
begin = read_cycle();
|
||||
int64_t begin, end, dur;
|
||||
volatile int block;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_INST
|
||||
encrypt_wrapper(statemt, key);
|
||||
asm volatile ("fence.i");
|
||||
#define XCUSTOM_ACC 0
|
||||
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block, vtop_translate(statemt), vtop_translate(key), 0);
|
||||
asm volatile ("fence.i");
|
||||
#else
|
||||
encrypt(statemt, key);
|
||||
#endif //CUSTOM_INST
|
||||
duration(begin, read_cycle());
|
||||
#endif //WRAP_encrypt
|
||||
encrypt (statemt, key);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
|
||||
printf ("\nencrypto message\t");
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
if (statemt[i] < 16)
|
||||
printf ("0");
|
||||
printf ("%x", statemt[i]);
|
||||
}
|
||||
|
||||
|
||||
//#ifdef CUSTOM_INST
|
||||
// #define XCUSTOM_ACC 0
|
||||
// ROCC_INSTRUCTION_SS(XCUSTOM_ACC, statemt, key, 0);
|
||||
//#else
|
||||
// decrypt (statemt, key);
|
||||
//#endif
|
||||
//
|
||||
decrypt (statemt, key);
|
||||
printf ("\ndecrypto message\t");
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
if (statemt[i] < 16)
|
||||
printf ("0");
|
||||
printf ("%x", statemt[i]);
|
||||
}
|
||||
|
||||
|
||||
#ifdef WRAP_decrypt
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_INST
|
||||
decrypt_wrapper(statemt, key);
|
||||
#else //CUSTOM_INST
|
||||
decrypt(statemt, key);
|
||||
#endif //WRAP_decrypt
|
||||
duration(begin, read_cycle());
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main ()
|
||||
int
|
||||
main ()
|
||||
{
|
||||
main_result = 0;
|
||||
aes_main ();
|
||||
printf ("\n%d\n", main_result);
|
||||
return main_result;
|
||||
}
|
||||
main_result = 0;
|
||||
aes_main ();
|
||||
printf ("\n%d\n", main_result);
|
||||
return main_result;
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes.h */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/* ************* data type define ************************* */
|
||||
int type;
|
||||
int nb;
|
||||
int round_val;
|
||||
int key[32];
|
||||
int statemt[32];
|
||||
int word[4][120];
|
||||
|
||||
|
||||
/* key generate */
|
||||
int KeySchedule (int, int *);
|
||||
int SubByte (int);
|
||||
|
||||
/* encrypto decrypto */
|
||||
void ByteSub_ShiftRow (int *, int);
|
||||
void InversShiftRow_ByteSub (int *, int);
|
||||
int MixColumn_AddRoundKey (int *, int, int);
|
||||
int AddRoundKey_InversMixColumn (int *, int, int);
|
||||
int AddRoundKey (int *, int, int);
|
||||
int encrypt (int *, int *);
|
||||
int decrypt (int *, int *);
|
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_dec.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
int
|
||||
decrypt (int statemt[32], int key[32])
|
||||
{
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=statemt
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=key
|
||||
int type = 128128;
|
||||
|
||||
int i;
|
||||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| * Test Vector (added for CHStone) |
|
||||
| out_enc_statemt : expected output data for "decrypt" |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
const int out_dec_statemt[16] =
|
||||
{ 0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2,
|
||||
0xe0, 0x37, 0x7, 0x34
|
||||
};
|
||||
KeySchedule (type, key);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
round_val = 10;
|
||||
nb = 4;
|
||||
break;
|
||||
case 128192:
|
||||
case 192192:
|
||||
round_val = 12;
|
||||
nb = 6;
|
||||
break;
|
||||
case 192128:
|
||||
round_val = 12;
|
||||
nb = 4;
|
||||
break;
|
||||
case 128256:
|
||||
case 192256:
|
||||
round_val = 14;
|
||||
nb = 8;
|
||||
break;
|
||||
case 256128:
|
||||
round_val = 14;
|
||||
nb = 4;
|
||||
break;
|
||||
case 256192:
|
||||
round_val = 14;
|
||||
nb = 6;
|
||||
break;
|
||||
case 256256:
|
||||
round_val = 14;
|
||||
nb = 8;
|
||||
break;
|
||||
}
|
||||
|
||||
AddRoundKey (statemt, type, round_val);
|
||||
|
||||
InversShiftRow_ByteSub (statemt, nb);
|
||||
|
||||
for (i = round_val - 1; i >= 1; --i)
|
||||
{
|
||||
AddRoundKey_InversMixColumn (statemt, nb, i);
|
||||
InversShiftRow_ByteSub (statemt, nb);
|
||||
}
|
||||
|
||||
AddRoundKey (statemt, type, 0);
|
||||
|
||||
// printf ("\ndecrypto message\t");
|
||||
// for (i = 0; i < ((type % 1000) / 8); ++i)
|
||||
// {
|
||||
// if (statemt[i] < 16)
|
||||
// printf ("0");
|
||||
// printf ("%x", statemt[i]);
|
||||
// }
|
||||
//
|
||||
for (i = 0; i < 16; i++)
|
||||
main_result += (statemt[i] != out_dec_statemt[i]);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -126,6 +126,6 @@ encrypt (int statemt[32], int key[32])
|
|||
// printf ("0");
|
||||
// printf ("%x", statemt[i]);
|
||||
// }
|
||||
//
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,542 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_function.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
const int Sbox[16][16] = {
|
||||
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
|
||||
0xfe, 0xd7, 0xab, 0x76},
|
||||
{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf,
|
||||
0x9c, 0xa4, 0x72, 0xc0},
|
||||
{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1,
|
||||
0x71, 0xd8, 0x31, 0x15},
|
||||
{0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
|
||||
0xeb, 0x27, 0xb2, 0x75},
|
||||
{0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3,
|
||||
0x29, 0xe3, 0x2f, 0x84},
|
||||
{0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39,
|
||||
0x4a, 0x4c, 0x58, 0xcf},
|
||||
{0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
|
||||
0x50, 0x3c, 0x9f, 0xa8},
|
||||
{0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21,
|
||||
0x10, 0xff, 0xf3, 0xd2},
|
||||
{0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d,
|
||||
0x64, 0x5d, 0x19, 0x73},
|
||||
{0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
|
||||
0xde, 0x5e, 0x0b, 0xdb},
|
||||
{0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62,
|
||||
0x91, 0x95, 0xe4, 0x79},
|
||||
{0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea,
|
||||
0x65, 0x7a, 0xae, 0x08},
|
||||
{0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
|
||||
0x4b, 0xbd, 0x8b, 0x8a},
|
||||
{0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9,
|
||||
0x86, 0xc1, 0x1d, 0x9e},
|
||||
{0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9,
|
||||
0xce, 0x55, 0x28, 0xdf},
|
||||
{0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
|
||||
0xb0, 0x54, 0xbb, 0x16}
|
||||
};
|
||||
const int invSbox[16][16] = {
|
||||
{0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
|
||||
0x81, 0xf3, 0xd7, 0xfb},
|
||||
{0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44,
|
||||
0xc4, 0xde, 0xe9, 0xcb},
|
||||
{0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b,
|
||||
0x42, 0xfa, 0xc3, 0x4e},
|
||||
{0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
|
||||
0x6d, 0x8b, 0xd1, 0x25},
|
||||
{0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc,
|
||||
0x5d, 0x65, 0xb6, 0x92},
|
||||
{0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57,
|
||||
0xa7, 0x8d, 0x9d, 0x84},
|
||||
{0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
|
||||
0xb8, 0xb3, 0x45, 0x06},
|
||||
{0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03,
|
||||
0x01, 0x13, 0x8a, 0x6b},
|
||||
{0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce,
|
||||
0xf0, 0xb4, 0xe6, 0x73},
|
||||
{0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
|
||||
0x1c, 0x75, 0xdf, 0x6e},
|
||||
{0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e,
|
||||
0xaa, 0x18, 0xbe, 0x1b},
|
||||
{0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe,
|
||||
0x78, 0xcd, 0x5a, 0xf4},
|
||||
{0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
|
||||
0x27, 0x80, 0xec, 0x5f},
|
||||
{0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f,
|
||||
0x93, 0xc9, 0x9c, 0xef},
|
||||
{0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c,
|
||||
0x83, 0x53, 0x99, 0x61},
|
||||
{0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
|
||||
0x55, 0x21, 0x0c, 0x7d}
|
||||
};
|
||||
|
||||
/* ********* ByteSub & ShiftRow ********* */
|
||||
void
|
||||
ByteSub_ShiftRow (int statemt[32], int nb)
|
||||
{
|
||||
int temp;
|
||||
|
||||
switch (nb)
|
||||
{
|
||||
case 4:
|
||||
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = temp;
|
||||
|
||||
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = temp;
|
||||
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = temp;
|
||||
|
||||
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = temp;
|
||||
|
||||
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
break;
|
||||
case 6:
|
||||
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = temp;
|
||||
|
||||
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = temp;
|
||||
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = temp;
|
||||
|
||||
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = temp;
|
||||
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = temp;
|
||||
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = temp;
|
||||
|
||||
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
break;
|
||||
case 8:
|
||||
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = Sbox[statemt[25] >> 4][statemt[25] & 0xf];
|
||||
statemt[25] = Sbox[statemt[29] >> 4][statemt[29] & 0xf];
|
||||
statemt[29] = temp;
|
||||
|
||||
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = Sbox[statemt[26] >> 4][statemt[26] & 0xf];
|
||||
statemt[26] = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = Sbox[statemt[30] >> 4][statemt[30] & 0xf];
|
||||
statemt[30] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = temp;
|
||||
|
||||
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = temp;
|
||||
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = temp;
|
||||
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = Sbox[statemt[27] >> 4][statemt[27] & 0xf];
|
||||
statemt[27] = temp;
|
||||
temp = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = Sbox[statemt[31] >> 4][statemt[31] & 0xf];
|
||||
statemt[31] = temp;
|
||||
|
||||
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
statemt[24] = Sbox[statemt[24] >> 4][statemt[24] & 0xf];
|
||||
statemt[28] = Sbox[statemt[28] >> 4][statemt[28] & 0xf];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
SubByte (int in)
|
||||
{
|
||||
return Sbox[(in / 16)][(in % 16)];
|
||||
}
|
||||
|
||||
/* ********* InversShiftRow & ByteSub ********* */
|
||||
void
|
||||
InversShiftRow_ByteSub (int statemt[32], int nb)
|
||||
{
|
||||
int temp;
|
||||
|
||||
switch (nb)
|
||||
{
|
||||
case 4:
|
||||
temp = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = temp;
|
||||
|
||||
temp = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = temp;
|
||||
temp = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = temp;
|
||||
|
||||
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = temp;
|
||||
|
||||
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
break;
|
||||
case 6:
|
||||
temp = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = temp;
|
||||
|
||||
temp = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = temp;
|
||||
temp = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = temp;
|
||||
|
||||
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = temp;
|
||||
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = temp;
|
||||
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = temp;
|
||||
|
||||
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
break;
|
||||
case 8:
|
||||
temp = invSbox[statemt[29] >> 4][statemt[29] & 0xf];
|
||||
statemt[29] = invSbox[statemt[25] >> 4][statemt[25] & 0xf];
|
||||
statemt[25] = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = temp;
|
||||
|
||||
temp = invSbox[statemt[30] >> 4][statemt[30] & 0xf];
|
||||
statemt[30] = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = invSbox[statemt[26] >> 4][statemt[26] & 0xf];
|
||||
statemt[26] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = temp;
|
||||
|
||||
temp = invSbox[statemt[31] >> 4][statemt[31] & 0xf];
|
||||
statemt[31] = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = temp;
|
||||
temp = invSbox[statemt[27] >> 4][statemt[27] & 0xf];
|
||||
statemt[27] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = temp;
|
||||
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = temp;
|
||||
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = temp;
|
||||
|
||||
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
statemt[24] = invSbox[statemt[24] >> 4][statemt[24] & 0xf];
|
||||
statemt[28] = invSbox[statemt[28] >> 4][statemt[28] & 0xf];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* ******** MixColumn ********** */
|
||||
int
|
||||
MixColumn_AddRoundKey (int statemt[32], int nb, int n)
|
||||
{
|
||||
int ret[8 * 4], j;
|
||||
register int x;
|
||||
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
ret[j * 4] = (statemt[j * 4] << 1);
|
||||
if ((ret[j * 4] >> 8) == 1)
|
||||
ret[j * 4] ^= 283;
|
||||
x = statemt[1 + j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[j * 4] ^= x;
|
||||
ret[j * 4] ^=
|
||||
statemt[3 + j * 4] ^ statemt[2 + j * 4] ^ word[0][j + nb * n];
|
||||
|
||||
ret[1 + j * 4] = (statemt[1 + j * 4] << 1);
|
||||
if ((ret[1 + j * 4] >> 8) == 1)
|
||||
ret[1 + j * 4] ^= 283;
|
||||
x = statemt[2 + j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[1 + j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[1 + j * 4] ^= x;
|
||||
ret[1 + j * 4] ^=
|
||||
statemt[3 + j * 4] ^ statemt[j * 4] ^ word[1][j + nb * n];
|
||||
|
||||
ret[2 + j * 4] = (statemt[2 + j * 4] << 1);
|
||||
if ((ret[2 + j * 4] >> 8) == 1)
|
||||
ret[2 + j * 4] ^= 283;
|
||||
x = statemt[3 + j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[2 + j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[2 + j * 4] ^= x;
|
||||
ret[2 + j * 4] ^=
|
||||
statemt[j * 4] ^ statemt[1 + j * 4] ^ word[2][j + nb * n];
|
||||
|
||||
ret[3 + j * 4] = (statemt[3 + j * 4] << 1);
|
||||
if ((ret[3 + j * 4] >> 8) == 1)
|
||||
ret[3 + j * 4] ^= 283;
|
||||
x = statemt[j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[3 + j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[3 + j * 4] ^= x;
|
||||
ret[3 + j * 4] ^=
|
||||
statemt[1 + j * 4] ^ statemt[2 + j * 4] ^ word[3][j + nb * n];
|
||||
}
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
statemt[2 + j * 4] = ret[2 + j * 4];
|
||||
statemt[j * 4] = ret[j * 4];
|
||||
statemt[1 + j * 4] = ret[1 + j * 4];
|
||||
statemt[3 + j * 4] = ret[3 + j * 4];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ******** InversMixColumn ********** */
|
||||
int
|
||||
AddRoundKey_InversMixColumn (int statemt[32], int nb, int n)
|
||||
{
|
||||
int ret[8 * 4], i, j;
|
||||
register int x;
|
||||
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
statemt[j * 4] ^= word[0][j + nb * n];
|
||||
statemt[1 + j * 4] ^= word[1][j + nb * n];
|
||||
statemt[2 + j * 4] ^= word[2][j + nb * n];
|
||||
statemt[3 + j * 4] ^= word[3][j + nb * n];
|
||||
}
|
||||
for (j = 0; j < nb; ++j)
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
x = (statemt[i + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[i + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[i + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
ret[i + j * 4] = x;
|
||||
|
||||
x = (statemt[(i + 1) % 4 + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 1) % 4 + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 1) % 4 + j * 4];
|
||||
ret[i + j * 4] ^= x;
|
||||
|
||||
x = (statemt[(i + 2) % 4 + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 2) % 4 + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 2) % 4 + j * 4];
|
||||
ret[i + j * 4] ^= x;
|
||||
|
||||
x = (statemt[(i + 3) % 4 + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 3) % 4 + j * 4];
|
||||
ret[i + j * 4] ^= x;
|
||||
}
|
||||
for (i = 0; i < nb; ++i)
|
||||
{
|
||||
statemt[i * 4] = ret[i * 4];
|
||||
statemt[1 + i * 4] = ret[1 + i * 4];
|
||||
statemt[2 + i * 4] = ret[2 + i * 4];
|
||||
statemt[3 + i * 4] = ret[3 + i * 4];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ******** AddRoundKey ********** */
|
||||
int
|
||||
AddRoundKey (int statemt[32], int type, int n)
|
||||
{
|
||||
int j, nb;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
case 192128:
|
||||
case 256128:
|
||||
nb = 4;
|
||||
break;
|
||||
case 128192:
|
||||
case 192192:
|
||||
case 256192:
|
||||
nb = 6;
|
||||
break;
|
||||
case 128256:
|
||||
case 192256:
|
||||
case 256256:
|
||||
nb = 8;
|
||||
break;
|
||||
}
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
statemt[j * 4] ^= word[0][j + nb * n];
|
||||
statemt[1 + j * 4] ^= word[1][j + nb * n];
|
||||
statemt[2 + j * 4] ^= word[2][j + nb * n];
|
||||
statemt[3 + j * 4] ^= word[3][j + nb * n];
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_key.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/* **************key generate & key display *******************/
|
||||
const int Rcon0[30] = {
|
||||
0x01, 0x02, 0x04, 0x08,
|
||||
0x10, 0x20, 0x40, 0x80,
|
||||
0x1b, 0x36, 0x6c, 0xd8,
|
||||
0xab, 0x4d, 0x9a, 0x2f,
|
||||
0x5e, 0xbc, 0x63, 0xc6,
|
||||
0x97, 0x35, 0x6a, 0xd4,
|
||||
0xb3, 0x7d, 0xfa, 0xef,
|
||||
0xc5, 0x91,
|
||||
};
|
||||
|
||||
/* **************** key expand ************************ */
|
||||
int
|
||||
KeySchedule (int type, int key[32])
|
||||
{
|
||||
int nk, nb, round_val;
|
||||
int i, j, temp[4];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
nk = 4;
|
||||
nb = 4;
|
||||
round_val = 10;
|
||||
break;
|
||||
case 128192:
|
||||
nk = 4;
|
||||
nb = 6;
|
||||
round_val = 12;
|
||||
break;
|
||||
case 128256:
|
||||
nk = 4;
|
||||
nb = 8;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 192128:
|
||||
nk = 6;
|
||||
nb = 4;
|
||||
round_val = 12;
|
||||
break;
|
||||
case 192192:
|
||||
nk = 6;
|
||||
nb = 6;
|
||||
round_val = 12;
|
||||
break;
|
||||
case 192256:
|
||||
nk = 6;
|
||||
nb = 8;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 256128:
|
||||
nk = 8;
|
||||
nb = 4;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 256192:
|
||||
nk = 8;
|
||||
nb = 6;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 256256:
|
||||
nk = 8;
|
||||
nb = 8;
|
||||
round_val = 14;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i)
|
||||
for (j = 0; j < nk; ++j)
|
||||
//for (i = 0; i < 4; ++i)
|
||||
/* 0 word */
|
||||
word[i][j] = key[i + j * 4];
|
||||
|
||||
/* expanded key is generated */
|
||||
for (j = nk; j < nb * (round_val + 1); ++j)
|
||||
{
|
||||
|
||||
/* RotByte */
|
||||
if ((j % nk) == 0)
|
||||
{
|
||||
temp[0] = SubByte (word[1][j - 1]) ^ Rcon0[(j / nk) - 1];
|
||||
temp[1] = SubByte (word[2][j - 1]);
|
||||
temp[2] = SubByte (word[3][j - 1]);
|
||||
temp[3] = SubByte (word[0][j - 1]);
|
||||
}
|
||||
if ((j % nk) != 0)
|
||||
{
|
||||
temp[0] = word[0][j - 1];
|
||||
temp[1] = word[1][j - 1];
|
||||
temp[2] = word[2][j - 1];
|
||||
temp[3] = word[3][j - 1];
|
||||
}
|
||||
if (nk > 6 && j % nk == 4)
|
||||
for (i = 0; i < 4; ++i)
|
||||
temp[i] = SubByte (temp[i]);
|
||||
for (i = 0; i < 4; ++i)
|
||||
word[i][j] = word[i][j - nk] ^ temp[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,98 @@
|
|||
// Based on code by Schuyler Eldridge. Copyright (c) Boston University
|
||||
// https://github.com/seldridge/rocket-rocc-examples/blob/master/src/main/c/rocc.h
|
||||
|
||||
#ifndef SRC_MAIN_C_ROCC_H
|
||||
#define SRC_MAIN_C_ROCC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define STR1(x) #x
|
||||
#define STR(x) STR1(x)
|
||||
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
|
||||
|
||||
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
|
||||
#define CUSTOM_0 0b0001011
|
||||
#define CUSTOM_1 0b0101011
|
||||
#define CUSTOM_2 0b1011011
|
||||
#define CUSTOM_3 0b1111011
|
||||
|
||||
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
|
||||
CUSTOMX_OPCODE(X) | \
|
||||
(rd << (7)) | \
|
||||
(xs2 << (7+5)) | \
|
||||
(xs1 << (7+5+1)) | \
|
||||
(xd << (7+5+2)) | \
|
||||
(rs1 << (7+5+3)) | \
|
||||
(rs2 << (7+5+3+5)) | \
|
||||
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
|
||||
|
||||
// Standard macro that passes rd, rs1, and rs2 via registers
|
||||
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
|
||||
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION_D(X, rd, funct) \
|
||||
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
|
||||
|
||||
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
|
||||
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION(X, funct) \
|
||||
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
|
||||
|
||||
// rd, rs1, and rs2 are data
|
||||
// rd_n, rs_1, and rs2_n are the register numbers to use
|
||||
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
: "=r" (rd_) \
|
||||
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
|
||||
}
|
||||
|
||||
#endif // SRC_MAIN_C_ACCUMULATOR_H
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=aes_tl
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_enc.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
/* ******* encrypto ************ */
|
||||
int
|
||||
encrypt (int statemt[32], int key[32], int type)
|
||||
{
|
||||
#pragma HLS INTERFACE m_axi port=statemt offset=slave bundle=gmem0 // Direct is for AXI with full 32 bit address space
|
||||
#pragma HLS INTERFACE m_axi port=key offset=slave bundle=gmem0 // Slave is for AXI4Lite, with burst mode disabled
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=statemt bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=key bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=type bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
int i;
|
||||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| * Test Vector (added for CHStone) |
|
||||
| out_enc_statemt : expected output data for "encrypt" |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
const int out_enc_statemt[16] =
|
||||
{ 0x39, 0x25, 0x84, 0x1d, 0x2, 0xdc, 0x9, 0xfb, 0xdc, 0x11, 0x85, 0x97,
|
||||
0x19, 0x6a, 0xb, 0x32
|
||||
};
|
||||
|
||||
KeySchedule (type, key);
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
round_val = 0;
|
||||
nb = 4;
|
||||
break;
|
||||
case 192128:
|
||||
round_val = 2;
|
||||
nb = 4;
|
||||
break;
|
||||
case 256128:
|
||||
round_val = 4;
|
||||
nb = 4;
|
||||
break;
|
||||
case 128192:
|
||||
case 192192:
|
||||
round_val = 2;
|
||||
nb = 6;
|
||||
break;
|
||||
case 256192:
|
||||
round_val = 4;
|
||||
nb = 6;
|
||||
break;
|
||||
case 128256:
|
||||
case 192256:
|
||||
case 256256:
|
||||
round_val = 4;
|
||||
nb = 8;
|
||||
break;
|
||||
}
|
||||
AddRoundKey (statemt, type, 0);
|
||||
for (i = 1; i <= round_val + 9; ++i)
|
||||
{
|
||||
ByteSub_ShiftRow (statemt, nb);
|
||||
MixColumn_AddRoundKey (statemt, nb, i);
|
||||
}
|
||||
ByteSub_ShiftRow (statemt, nb);
|
||||
AddRoundKey (statemt, type, i);
|
||||
|
||||
printf ("encrypted message \t");
|
||||
for (i = 0; i < nb * 4; ++i)
|
||||
{
|
||||
if (statemt[i] < 16)
|
||||
printf ("0");
|
||||
printf ("%x", statemt[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
main_result += (statemt[i] != out_enc_statemt[i]);
|
||||
|
||||
return 0;
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,203 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
|
||||
#include "../os_utils.h"
|
||||
#define ACCEL_CONTROL 0x40000
|
||||
#define AP_DONE_MASK 0b10
|
||||
|
||||
#define ACCEL_INT 0x40004
|
||||
#define ACCEL_STATEMT 0x40018
|
||||
#define ACCEL_KEY 0x40024
|
||||
#define ACCEL_TYPE 0x40030
|
||||
#define ACCEL_RET 0x40010
|
||||
|
||||
int main_result;
|
||||
|
||||
#include "aes.h"
|
||||
#include "aes_enc.c"
|
||||
#include "aes_dec.c"
|
||||
#include "aes_key.c"
|
||||
#include "aes_func.c"
|
||||
#include "../custom_mmap/mmap_driver.c"
|
||||
|
||||
int decrypt_accel(int* statemt, int* key, int type){
|
||||
|
||||
uint64_t addr;
|
||||
// Disable interrupt for now
|
||||
//reg_write32(ACCEL_INT, 0x0);
|
||||
access_addr(ACCEL_INT, OUT, 0);
|
||||
|
||||
//int fd1 = mmap_init();
|
||||
//int fd2 = mmap_init();
|
||||
//char * addr1 = copy_to_buffer((char*)statemt, 32 * sizeof(int), fd1);
|
||||
|
||||
addr = vtop_translate(statemt);
|
||||
//addr = vtop_translate(addr1);
|
||||
access_addr(ACCEL_STATEMT, OUT, addr);
|
||||
access_addr(ACCEL_STATEMT + 4, OUT, addr >> 32);
|
||||
|
||||
// char * addr2 = copy_to_buffer((char*)key, 32 * sizeof(int), fd2);
|
||||
addr = vtop_translate(key);
|
||||
//addr = vtop_translate(addr2);
|
||||
access_addr(ACCEL_KEY, OUT, addr);
|
||||
access_addr(ACCEL_KEY + 4, OUT, addr >> 32);
|
||||
|
||||
//addr = vtop_translate(key);
|
||||
access_addr(ACCEL_TYPE, OUT, type);
|
||||
|
||||
// Set up pointer a and pointer b address
|
||||
// reg_write32(ACCEL_STATEMT, (uint32_t)statemt);
|
||||
// reg_write32(ACCEL_KEY, (uint32_t)key);
|
||||
// reg_write32(ACCEL_TYPE, (uint32_t)type);
|
||||
|
||||
// Write to ap_start to start the execution
|
||||
access_addr(ACCEL_CONTROL, OUT, 0x1);
|
||||
// reg_write32(ACCEL_CONTROL, 0x1);
|
||||
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
// done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
|
||||
|
||||
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
|
||||
}
|
||||
// memcpy(statemt, addr1, 32 * 4 );
|
||||
// memcpy(key, addr2, 32 * 4 );
|
||||
//
|
||||
// mmap_delete(fd1, addr1);
|
||||
// mmap_delete(fd2, addr2);
|
||||
//
|
||||
//int c = reg_read32(ACCEL_RET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ***************** main **************************** */
|
||||
int
|
||||
aes_main (void)
|
||||
{
|
||||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| * Test Vectors (added for CHStone) |
|
||||
| statemt, key : input data |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
statemt[0] = 50;
|
||||
statemt[1] = 67;
|
||||
statemt[2] = 246;
|
||||
statemt[3] = 168;
|
||||
statemt[4] = 136;
|
||||
statemt[5] = 90;
|
||||
statemt[6] = 48;
|
||||
statemt[7] = 141;
|
||||
statemt[8] = 49;
|
||||
statemt[9] = 49;
|
||||
statemt[10] = 152;
|
||||
statemt[11] = 162;
|
||||
statemt[12] = 224;
|
||||
statemt[13] = 55;
|
||||
statemt[14] = 7;
|
||||
statemt[15] = 52;
|
||||
|
||||
key[0] = 43;
|
||||
key[1] = 126;
|
||||
key[2] = 21;
|
||||
key[3] = 22;
|
||||
key[4] = 40;
|
||||
key[5] = 174;
|
||||
key[6] = 210;
|
||||
key[7] = 166;
|
||||
key[8] = 171;
|
||||
key[9] = 247;
|
||||
key[10] = 21;
|
||||
key[11] = 136;
|
||||
key[12] = 9;
|
||||
key[13] = 207;
|
||||
key[14] = 79;
|
||||
key[15] = 60;
|
||||
|
||||
|
||||
int64_t begin, end, dur;
|
||||
|
||||
encrypt (statemt, key, 128128);
|
||||
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_DRIVER
|
||||
decrypt_accel(statemt, key, 128128);
|
||||
#else
|
||||
decrypt (statemt, key, 128128);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
main_result = 0;
|
||||
aes_main ();
|
||||
printf ("\n%d\n", main_result);
|
||||
return main_result;
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef __TIME_H__
|
||||
#define __TIME_H__
|
||||
|
||||
#include<stdio.h>
|
||||
//#include "encoding.h"
|
||||
|
||||
uint64_t read_cycle() {
|
||||
uint64_t rd = 0;
|
||||
asm volatile("rdcycle %0 " : "=r"(rd));
|
||||
//rd = rdcycle();
|
||||
//printf("Time: %ld. \n", rd);
|
||||
return rd;
|
||||
}
|
||||
|
||||
uint64_t duration(uint64_t start, uint64_t end){
|
||||
uint64_t dur = end - start;
|
||||
printf("Duration: %ld. \n", dur);
|
||||
return dur;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=aes_tl
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes.h */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/* ************* data type define ************************* */
|
||||
int type;
|
||||
int nb;
|
||||
int round_val;
|
||||
int key[32];
|
||||
int statemt[32];
|
||||
int word[4][120];
|
||||
|
||||
|
||||
/* key generate */
|
||||
int KeySchedule (int, int *);
|
||||
int SubByte (int);
|
||||
|
||||
/* encrypto decrypto */
|
||||
void ByteSub_ShiftRow (int *, int);
|
||||
void InversShiftRow_ByteSub (int *, int);
|
||||
int MixColumn_AddRoundKey (int *, int, int);
|
||||
int AddRoundKey_InversMixColumn (int *, int, int);
|
||||
int AddRoundKey (int *, int, int);
|
||||
int encrypt (int *, int *, int);
|
||||
int decrypt (int *, int *, int);
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_dec.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
int
|
||||
decrypt (int statemt[32], int key[32], int type)
|
||||
{
|
||||
#pragma HLS INTERFACE m_axi port=statemt offset=slave bundle=gmem0 // Direct is for AXI with full 32 bit address space
|
||||
#pragma HLS INTERFACE m_axi port=key offset=slave bundle=gmem0 // Slave is for AXI4Lite, with burst mode disabled
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=statemt bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=key bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=type bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
|
||||
int i;
|
||||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| * Test Vector (added for CHStone) |
|
||||
| out_enc_statemt : expected output data for "decrypt" |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
const int out_dec_statemt[16] =
|
||||
{ 0x32, 0x43, 0xf6, 0xa8, 0x88, 0x5a, 0x30, 0x8d, 0x31, 0x31, 0x98, 0xa2,
|
||||
0xe0, 0x37, 0x7, 0x34
|
||||
};
|
||||
KeySchedule (type, key);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
round_val = 10;
|
||||
nb = 4;
|
||||
break;
|
||||
case 128192:
|
||||
case 192192:
|
||||
round_val = 12;
|
||||
nb = 6;
|
||||
break;
|
||||
case 192128:
|
||||
round_val = 12;
|
||||
nb = 4;
|
||||
break;
|
||||
case 128256:
|
||||
case 192256:
|
||||
round_val = 14;
|
||||
nb = 8;
|
||||
break;
|
||||
case 256128:
|
||||
round_val = 14;
|
||||
nb = 4;
|
||||
break;
|
||||
case 256192:
|
||||
round_val = 14;
|
||||
nb = 6;
|
||||
break;
|
||||
case 256256:
|
||||
round_val = 14;
|
||||
nb = 8;
|
||||
break;
|
||||
}
|
||||
|
||||
AddRoundKey (statemt, type, round_val);
|
||||
|
||||
InversShiftRow_ByteSub (statemt, nb);
|
||||
|
||||
for (i = round_val - 1; i >= 1; --i)
|
||||
{
|
||||
AddRoundKey_InversMixColumn (statemt, nb, i);
|
||||
InversShiftRow_ByteSub (statemt, nb);
|
||||
}
|
||||
|
||||
AddRoundKey (statemt, type, 0);
|
||||
|
||||
printf ("\ndecrypto message\t");
|
||||
for (i = 0; i < ((type % 1000) / 8); ++i)
|
||||
{
|
||||
if (statemt[i] < 16)
|
||||
printf ("0");
|
||||
printf ("%x", statemt[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
main_result += (statemt[i] != out_dec_statemt[i]);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -130,7 +130,7 @@ encrypt (int statemt[32], int key[32], int type)
|
|||
// printf ("0");
|
||||
// printf ("%x", statemt[i]);
|
||||
// }
|
||||
|
||||
//
|
||||
for (i = 0; i < 16; i++)
|
||||
main_result += (statemt[i] != out_enc_statemt[i]);
|
||||
|
|
@ -0,0 +1,542 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_function.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
const int Sbox[16][16] = {
|
||||
{0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
|
||||
0xfe, 0xd7, 0xab, 0x76},
|
||||
{0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf,
|
||||
0x9c, 0xa4, 0x72, 0xc0},
|
||||
{0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1,
|
||||
0x71, 0xd8, 0x31, 0x15},
|
||||
{0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
|
||||
0xeb, 0x27, 0xb2, 0x75},
|
||||
{0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3,
|
||||
0x29, 0xe3, 0x2f, 0x84},
|
||||
{0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39,
|
||||
0x4a, 0x4c, 0x58, 0xcf},
|
||||
{0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
|
||||
0x50, 0x3c, 0x9f, 0xa8},
|
||||
{0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21,
|
||||
0x10, 0xff, 0xf3, 0xd2},
|
||||
{0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d,
|
||||
0x64, 0x5d, 0x19, 0x73},
|
||||
{0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
|
||||
0xde, 0x5e, 0x0b, 0xdb},
|
||||
{0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62,
|
||||
0x91, 0x95, 0xe4, 0x79},
|
||||
{0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea,
|
||||
0x65, 0x7a, 0xae, 0x08},
|
||||
{0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
|
||||
0x4b, 0xbd, 0x8b, 0x8a},
|
||||
{0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9,
|
||||
0x86, 0xc1, 0x1d, 0x9e},
|
||||
{0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9,
|
||||
0xce, 0x55, 0x28, 0xdf},
|
||||
{0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
|
||||
0xb0, 0x54, 0xbb, 0x16}
|
||||
};
|
||||
const int invSbox[16][16] = {
|
||||
{0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e,
|
||||
0x81, 0xf3, 0xd7, 0xfb},
|
||||
{0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44,
|
||||
0xc4, 0xde, 0xe9, 0xcb},
|
||||
{0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b,
|
||||
0x42, 0xfa, 0xc3, 0x4e},
|
||||
{0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49,
|
||||
0x6d, 0x8b, 0xd1, 0x25},
|
||||
{0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc,
|
||||
0x5d, 0x65, 0xb6, 0x92},
|
||||
{0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57,
|
||||
0xa7, 0x8d, 0x9d, 0x84},
|
||||
{0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05,
|
||||
0xb8, 0xb3, 0x45, 0x06},
|
||||
{0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03,
|
||||
0x01, 0x13, 0x8a, 0x6b},
|
||||
{0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce,
|
||||
0xf0, 0xb4, 0xe6, 0x73},
|
||||
{0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8,
|
||||
0x1c, 0x75, 0xdf, 0x6e},
|
||||
{0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e,
|
||||
0xaa, 0x18, 0xbe, 0x1b},
|
||||
{0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe,
|
||||
0x78, 0xcd, 0x5a, 0xf4},
|
||||
{0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59,
|
||||
0x27, 0x80, 0xec, 0x5f},
|
||||
{0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f,
|
||||
0x93, 0xc9, 0x9c, 0xef},
|
||||
{0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c,
|
||||
0x83, 0x53, 0x99, 0x61},
|
||||
{0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63,
|
||||
0x55, 0x21, 0x0c, 0x7d}
|
||||
};
|
||||
|
||||
/* ********* ByteSub & ShiftRow ********* */
|
||||
void
|
||||
ByteSub_ShiftRow (int statemt[32], int nb)
|
||||
{
|
||||
int temp;
|
||||
|
||||
switch (nb)
|
||||
{
|
||||
case 4:
|
||||
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = temp;
|
||||
|
||||
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = temp;
|
||||
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = temp;
|
||||
|
||||
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = temp;
|
||||
|
||||
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
break;
|
||||
case 6:
|
||||
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = temp;
|
||||
|
||||
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = temp;
|
||||
temp = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = temp;
|
||||
|
||||
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = temp;
|
||||
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = temp;
|
||||
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = temp;
|
||||
|
||||
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
break;
|
||||
case 8:
|
||||
temp = Sbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = Sbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = Sbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = Sbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = Sbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = Sbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = Sbox[statemt[25] >> 4][statemt[25] & 0xf];
|
||||
statemt[25] = Sbox[statemt[29] >> 4][statemt[29] & 0xf];
|
||||
statemt[29] = temp;
|
||||
|
||||
temp = Sbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = Sbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = Sbox[statemt[26] >> 4][statemt[26] & 0xf];
|
||||
statemt[26] = Sbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = Sbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = Sbox[statemt[30] >> 4][statemt[30] & 0xf];
|
||||
statemt[30] = Sbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = Sbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = temp;
|
||||
|
||||
temp = Sbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = Sbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = temp;
|
||||
temp = Sbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = Sbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = temp;
|
||||
temp = Sbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = Sbox[statemt[27] >> 4][statemt[27] & 0xf];
|
||||
statemt[27] = temp;
|
||||
temp = Sbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = Sbox[statemt[31] >> 4][statemt[31] & 0xf];
|
||||
statemt[31] = temp;
|
||||
|
||||
statemt[0] = Sbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = Sbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = Sbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = Sbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = Sbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = Sbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
statemt[24] = Sbox[statemt[24] >> 4][statemt[24] & 0xf];
|
||||
statemt[28] = Sbox[statemt[28] >> 4][statemt[28] & 0xf];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
SubByte (int in)
|
||||
{
|
||||
return Sbox[(in / 16)][(in % 16)];
|
||||
}
|
||||
|
||||
/* ********* InversShiftRow & ByteSub ********* */
|
||||
void
|
||||
InversShiftRow_ByteSub (int statemt[32], int nb)
|
||||
{
|
||||
int temp;
|
||||
|
||||
switch (nb)
|
||||
{
|
||||
case 4:
|
||||
temp = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = temp;
|
||||
|
||||
temp = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = temp;
|
||||
temp = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = temp;
|
||||
|
||||
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = temp;
|
||||
|
||||
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
break;
|
||||
case 6:
|
||||
temp = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = temp;
|
||||
|
||||
temp = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = temp;
|
||||
temp = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = temp;
|
||||
|
||||
temp = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = temp;
|
||||
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = temp;
|
||||
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = temp;
|
||||
|
||||
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
break;
|
||||
case 8:
|
||||
temp = invSbox[statemt[29] >> 4][statemt[29] & 0xf];
|
||||
statemt[29] = invSbox[statemt[25] >> 4][statemt[25] & 0xf];
|
||||
statemt[25] = invSbox[statemt[21] >> 4][statemt[21] & 0xf];
|
||||
statemt[21] = invSbox[statemt[17] >> 4][statemt[17] & 0xf];
|
||||
statemt[17] = invSbox[statemt[13] >> 4][statemt[13] & 0xf];
|
||||
statemt[13] = invSbox[statemt[9] >> 4][statemt[9] & 0xf];
|
||||
statemt[9] = invSbox[statemt[5] >> 4][statemt[5] & 0xf];
|
||||
statemt[5] = invSbox[statemt[1] >> 4][statemt[1] & 0xf];
|
||||
statemt[1] = temp;
|
||||
|
||||
temp = invSbox[statemt[30] >> 4][statemt[30] & 0xf];
|
||||
statemt[30] = invSbox[statemt[18] >> 4][statemt[18] & 0xf];
|
||||
statemt[18] = invSbox[statemt[6] >> 4][statemt[6] & 0xf];
|
||||
statemt[6] = invSbox[statemt[26] >> 4][statemt[26] & 0xf];
|
||||
statemt[26] = invSbox[statemt[14] >> 4][statemt[14] & 0xf];
|
||||
statemt[14] = invSbox[statemt[2] >> 4][statemt[2] & 0xf];
|
||||
statemt[2] = invSbox[statemt[22] >> 4][statemt[22] & 0xf];
|
||||
statemt[22] = invSbox[statemt[10] >> 4][statemt[10] & 0xf];
|
||||
statemt[10] = temp;
|
||||
|
||||
temp = invSbox[statemt[31] >> 4][statemt[31] & 0xf];
|
||||
statemt[31] = invSbox[statemt[15] >> 4][statemt[15] & 0xf];
|
||||
statemt[15] = temp;
|
||||
temp = invSbox[statemt[27] >> 4][statemt[27] & 0xf];
|
||||
statemt[27] = invSbox[statemt[11] >> 4][statemt[11] & 0xf];
|
||||
statemt[11] = temp;
|
||||
temp = invSbox[statemt[23] >> 4][statemt[23] & 0xf];
|
||||
statemt[23] = invSbox[statemt[7] >> 4][statemt[7] & 0xf];
|
||||
statemt[7] = temp;
|
||||
temp = invSbox[statemt[19] >> 4][statemt[19] & 0xf];
|
||||
statemt[19] = invSbox[statemt[3] >> 4][statemt[3] & 0xf];
|
||||
statemt[3] = temp;
|
||||
|
||||
statemt[0] = invSbox[statemt[0] >> 4][statemt[0] & 0xf];
|
||||
statemt[4] = invSbox[statemt[4] >> 4][statemt[4] & 0xf];
|
||||
statemt[8] = invSbox[statemt[8] >> 4][statemt[8] & 0xf];
|
||||
statemt[12] = invSbox[statemt[12] >> 4][statemt[12] & 0xf];
|
||||
statemt[16] = invSbox[statemt[16] >> 4][statemt[16] & 0xf];
|
||||
statemt[20] = invSbox[statemt[20] >> 4][statemt[20] & 0xf];
|
||||
statemt[24] = invSbox[statemt[24] >> 4][statemt[24] & 0xf];
|
||||
statemt[28] = invSbox[statemt[28] >> 4][statemt[28] & 0xf];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* ******** MixColumn ********** */
|
||||
int
|
||||
MixColumn_AddRoundKey (int statemt[32], int nb, int n)
|
||||
{
|
||||
int ret[8 * 4], j;
|
||||
register int x;
|
||||
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
ret[j * 4] = (statemt[j * 4] << 1);
|
||||
if ((ret[j * 4] >> 8) == 1)
|
||||
ret[j * 4] ^= 283;
|
||||
x = statemt[1 + j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[j * 4] ^= x;
|
||||
ret[j * 4] ^=
|
||||
statemt[2 + j * 4] ^ statemt[3 + j * 4] ^ word[0][j + nb * n];
|
||||
|
||||
ret[1 + j * 4] = (statemt[1 + j * 4] << 1);
|
||||
if ((ret[1 + j * 4] >> 8) == 1)
|
||||
ret[1 + j * 4] ^= 283;
|
||||
x = statemt[2 + j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[1 + j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[1 + j * 4] ^= x;
|
||||
ret[1 + j * 4] ^=
|
||||
statemt[3 + j * 4] ^ statemt[j * 4] ^ word[1][j + nb * n];
|
||||
|
||||
ret[2 + j * 4] = (statemt[2 + j * 4] << 1);
|
||||
if ((ret[2 + j * 4] >> 8) == 1)
|
||||
ret[2 + j * 4] ^= 283;
|
||||
x = statemt[3 + j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[2 + j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[2 + j * 4] ^= x;
|
||||
ret[2 + j * 4] ^=
|
||||
statemt[j * 4] ^ statemt[1 + j * 4] ^ word[2][j + nb * n];
|
||||
|
||||
ret[3 + j * 4] = (statemt[3 + j * 4] << 1);
|
||||
if ((ret[3 + j * 4] >> 8) == 1)
|
||||
ret[3 + j * 4] ^= 283;
|
||||
x = statemt[j * 4];
|
||||
x ^= (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
ret[3 + j * 4] ^= (x ^ 283);
|
||||
else
|
||||
ret[3 + j * 4] ^= x;
|
||||
ret[3 + j * 4] ^=
|
||||
statemt[1 + j * 4] ^ statemt[2 + j * 4] ^ word[3][j + nb * n];
|
||||
}
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
statemt[j * 4] = ret[j * 4];
|
||||
statemt[1 + j * 4] = ret[1 + j * 4];
|
||||
statemt[2 + j * 4] = ret[2 + j * 4];
|
||||
statemt[3 + j * 4] = ret[3 + j * 4];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ******** InversMixColumn ********** */
|
||||
int
|
||||
AddRoundKey_InversMixColumn (int statemt[32], int nb, int n)
|
||||
{
|
||||
int ret[8 * 4], i, j;
|
||||
register int x;
|
||||
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
statemt[j * 4] ^= word[0][j + nb * n];
|
||||
statemt[1 + j * 4] ^= word[1][j + nb * n];
|
||||
statemt[2 + j * 4] ^= word[2][j + nb * n];
|
||||
statemt[3 + j * 4] ^= word[3][j + nb * n];
|
||||
}
|
||||
for (j = 0; j < nb; ++j)
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
x = (statemt[i + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[i + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[i + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
ret[i + j * 4] = x;
|
||||
|
||||
x = (statemt[(i + 1) % 4 + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 1) % 4 + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 1) % 4 + j * 4];
|
||||
ret[i + j * 4] ^= x;
|
||||
|
||||
x = (statemt[(i + 2) % 4 + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 2) % 4 + j * 4];
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 2) % 4 + j * 4];
|
||||
ret[i + j * 4] ^= x;
|
||||
|
||||
x = (statemt[(i + 3) % 4 + j * 4] << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x = (x << 1);
|
||||
if ((x >> 8) == 1)
|
||||
x ^= 283;
|
||||
x ^= statemt[(i + 3) % 4 + j * 4];
|
||||
ret[i + j * 4] ^= x;
|
||||
}
|
||||
for (i = 0; i < nb; ++i)
|
||||
{
|
||||
statemt[i * 4] = ret[i * 4];
|
||||
statemt[1 + i * 4] = ret[1 + i * 4];
|
||||
statemt[2 + i * 4] = ret[2 + i * 4];
|
||||
statemt[3 + i * 4] = ret[3 + i * 4];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ******** AddRoundKey ********** */
|
||||
int
|
||||
AddRoundKey (int statemt[32], int type, int n)
|
||||
{
|
||||
int j, nb;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
case 192128:
|
||||
case 256128:
|
||||
nb = 4;
|
||||
break;
|
||||
case 128192:
|
||||
case 192192:
|
||||
case 256192:
|
||||
nb = 6;
|
||||
break;
|
||||
case 128256:
|
||||
case 192256:
|
||||
case 256256:
|
||||
nb = 8;
|
||||
break;
|
||||
}
|
||||
for (j = 0; j < nb; ++j)
|
||||
{
|
||||
statemt[j * 4] ^= word[0][j + nb * n];
|
||||
statemt[1 + j * 4] ^= word[1][j + nb * n];
|
||||
statemt[2 + j * 4] ^= word[2][j + nb * n];
|
||||
statemt[3 + j * 4] ^= word[3][j + nb * n];
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes_key.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
/* **************key generate & key display *******************/
|
||||
const int Rcon0[30] = {
|
||||
0x01, 0x02, 0x04, 0x08,
|
||||
0x10, 0x20, 0x40, 0x80,
|
||||
0x1b, 0x36, 0x6c, 0xd8,
|
||||
0xab, 0x4d, 0x9a, 0x2f,
|
||||
0x5e, 0xbc, 0x63, 0xc6,
|
||||
0x97, 0x35, 0x6a, 0xd4,
|
||||
0xb3, 0x7d, 0xfa, 0xef,
|
||||
0xc5, 0x91,
|
||||
};
|
||||
|
||||
/* **************** key expand ************************ */
|
||||
int
|
||||
KeySchedule (int type, int key[32])
|
||||
{
|
||||
int nk, nb, round_val;
|
||||
int i, j, temp[4];
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case 128128:
|
||||
nk = 4;
|
||||
nb = 4;
|
||||
round_val = 10;
|
||||
break;
|
||||
case 128192:
|
||||
nk = 4;
|
||||
nb = 6;
|
||||
round_val = 12;
|
||||
break;
|
||||
case 128256:
|
||||
nk = 4;
|
||||
nb = 8;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 192128:
|
||||
nk = 6;
|
||||
nb = 4;
|
||||
round_val = 12;
|
||||
break;
|
||||
case 192192:
|
||||
nk = 6;
|
||||
nb = 6;
|
||||
round_val = 12;
|
||||
break;
|
||||
case 192256:
|
||||
nk = 6;
|
||||
nb = 8;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 256128:
|
||||
nk = 8;
|
||||
nb = 4;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 256192:
|
||||
nk = 8;
|
||||
nb = 6;
|
||||
round_val = 14;
|
||||
break;
|
||||
case 256256:
|
||||
nk = 8;
|
||||
nb = 8;
|
||||
round_val = 14;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
for (j = 0; j < nk; ++j)
|
||||
for (i = 0; i < 4; ++i)
|
||||
/* 0 word */
|
||||
word[i][j] = key[i + j * 4];
|
||||
|
||||
/* expanded key is generated */
|
||||
for (j = nk; j < nb * (round_val + 1); ++j)
|
||||
{
|
||||
|
||||
/* RotByte */
|
||||
if ((j % nk) == 0)
|
||||
{
|
||||
temp[0] = SubByte (word[1][j - 1]) ^ Rcon0[(j / nk) - 1];
|
||||
temp[1] = SubByte (word[2][j - 1]);
|
||||
temp[2] = SubByte (word[3][j - 1]);
|
||||
temp[3] = SubByte (word[0][j - 1]);
|
||||
}
|
||||
if ((j % nk) != 0)
|
||||
{
|
||||
temp[0] = word[0][j - 1];
|
||||
temp[1] = word[1][j - 1];
|
||||
temp[2] = word[2][j - 1];
|
||||
temp[3] = word[3][j - 1];
|
||||
}
|
||||
if (nk > 6 && j % nk == 4)
|
||||
for (i = 0; i < 4; ++i)
|
||||
temp[i] = SubByte (temp[i]);
|
||||
for (i = 0; i < 4; ++i)
|
||||
word[i][j] = word[i][j - nk] ^ temp[i];
|
||||
}
|
||||
return 0;
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,207 @@
|
|||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| CHStone : a suite of benchmark programs for C-based High-Level Synthesis |
|
||||
| ======================================================================== |
|
||||
| |
|
||||
| * Collected and Modified : Y. Hara, H. Tomiyama, S. Honda, |
|
||||
| H. Takada and K. Ishii |
|
||||
| Nagoya University, Japan |
|
||||
| |
|
||||
| * Remark : |
|
||||
| 1. This source code is modified to unify the formats of the benchmark |
|
||||
| programs in CHStone. |
|
||||
| 2. Test vectors are added for CHStone. |
|
||||
| 3. If "main_result" is 0 at the end of the program, the program is |
|
||||
| correctly executed. |
|
||||
| 4. Please follow the copyright of each benchmark program. |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
/* aes.c */
|
||||
/*
|
||||
* Copyright (C) 2005
|
||||
* Akira Iwata & Masayuki Sato
|
||||
* Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* This software is written by Masayuki Sato.
|
||||
* And if you want to contact us, send an email to Kimitake Wakayama
|
||||
* (wakayama@elcom.nitech.ac.jp)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without modification,
|
||||
* are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All advertising materials mentioning features or use of this software must
|
||||
* display the following acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* 4. Redistributions of any form whatsoever must retain the following
|
||||
* acknowledgment:
|
||||
* "This product includes software developed by Akira Iwata Laboratory,
|
||||
* Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
|
||||
* AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
|
||||
* IN NO EVENT SHALL AKIRA IWATA LABORATORY BE LIABLE FOR ANY SPECIAL,
|
||||
* INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
|
||||
* FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION
|
||||
* WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
|
||||
//#include "mmio.h"
|
||||
#include "time.h"
|
||||
#include "../os_utils.h"
|
||||
#define ACCEL_CONTROL 0x30000
|
||||
#define AP_DONE_MASK 0b10
|
||||
|
||||
#define ACCEL_INT 0x30004
|
||||
#define ACCEL_STATEMT 0x30018
|
||||
#define ACCEL_KEY 0x30024
|
||||
#define ACCEL_TYPE 0x30030
|
||||
#define ACCEL_RET 0x30010
|
||||
|
||||
int main_result;
|
||||
|
||||
#include "aes.h"
|
||||
#include "aes_enc.c"
|
||||
#include "aes_dec.c"
|
||||
#include "aes_key.c"
|
||||
#include "aes_func.c"
|
||||
#include "../custom_mmap/mmap_driver.c"
|
||||
|
||||
int encrypt_accel(int* statemt, int* key, int type){
|
||||
|
||||
uint64_t addr;
|
||||
// Disable interrupt for now
|
||||
//reg_write32(ACCEL_INT, 0x0);
|
||||
access_addr(ACCEL_INT, OUT, 0);
|
||||
|
||||
int fd1 = mmap_init();
|
||||
int fd2 = mmap_init();
|
||||
char * addr1 = copy_to_buffer((char*)statemt, 32 * sizeof(int), fd1);
|
||||
|
||||
addr = vtop_translate(statemt);
|
||||
addr = vtop_translate(addr1);
|
||||
access_addr(ACCEL_STATEMT, OUT, addr);
|
||||
access_addr(ACCEL_STATEMT + 4, OUT, addr >> 32);
|
||||
|
||||
char * addr2 = copy_to_buffer((char*)key, 32 * sizeof(int), fd2);
|
||||
//addr = vtop_translate(key);
|
||||
addr = vtop_translate(addr2);
|
||||
access_addr(ACCEL_KEY, OUT, addr);
|
||||
access_addr(ACCEL_KEY + 4, OUT, addr >> 32);
|
||||
|
||||
//addr = vtop_translate(key);
|
||||
access_addr(ACCEL_TYPE, OUT, type);
|
||||
|
||||
// Set up pointer a and pointer b address
|
||||
// reg_write32(ACCEL_STATEMT, (uint32_t)statemt);
|
||||
// reg_write32(ACCEL_KEY, (uint32_t)key);
|
||||
// reg_write32(ACCEL_TYPE, (uint32_t)type);
|
||||
|
||||
// Write to ap_start to start the execution
|
||||
access_addr(ACCEL_CONTROL, OUT, 0x1);
|
||||
// reg_write32(ACCEL_CONTROL, 0x1);
|
||||
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
// done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
|
||||
|
||||
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
|
||||
}
|
||||
memcpy(statemt, addr1, 32 * 4 );
|
||||
memcpy(key, addr2, 32 * 4 );
|
||||
|
||||
mmap_delete(fd1, addr1);
|
||||
mmap_delete(fd2, addr2);
|
||||
|
||||
//int c = reg_read32(ACCEL_RET);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* ***************** main **************************** */
|
||||
int
|
||||
aes_main (void)
|
||||
{
|
||||
/*
|
||||
+--------------------------------------------------------------------------+
|
||||
| * Test Vectors (added for CHStone) |
|
||||
| statemt, key : input data |
|
||||
+--------------------------------------------------------------------------+
|
||||
*/
|
||||
statemt[0] = 50;
|
||||
statemt[1] = 67;
|
||||
statemt[2] = 246;
|
||||
statemt[3] = 168;
|
||||
statemt[4] = 136;
|
||||
statemt[5] = 90;
|
||||
statemt[6] = 48;
|
||||
statemt[7] = 141;
|
||||
statemt[8] = 49;
|
||||
statemt[9] = 49;
|
||||
statemt[10] = 152;
|
||||
statemt[11] = 162;
|
||||
statemt[12] = 224;
|
||||
statemt[13] = 55;
|
||||
statemt[14] = 7;
|
||||
statemt[15] = 52;
|
||||
|
||||
key[0] = 43;
|
||||
key[1] = 126;
|
||||
key[2] = 21;
|
||||
key[3] = 22;
|
||||
key[4] = 40;
|
||||
key[5] = 174;
|
||||
key[6] = 210;
|
||||
key[7] = 166;
|
||||
key[8] = 171;
|
||||
key[9] = 247;
|
||||
key[10] = 21;
|
||||
key[11] = 136;
|
||||
key[12] = 9;
|
||||
key[13] = 207;
|
||||
key[14] = 79;
|
||||
key[15] = 60;
|
||||
|
||||
int64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_DRIVER
|
||||
encrypt_accel(statemt, key, 128128);
|
||||
#else
|
||||
encrypt (statemt, key, 128128);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
decrypt (statemt, key, 128128);
|
||||
|
||||
duration(begin, end);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
open_dev_mem();
|
||||
main_result = 0;
|
||||
aes_main ();
|
||||
printf ("\n%d\n", main_result);
|
||||
close_dev_mem();
|
||||
return main_result;
|
||||
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
TARGET ?=XXX
|
||||
|
||||
VERBOSE=1
|
||||
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=bitonic
|
||||
|
||||
FUNC=sort
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
CPP ?= 0
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=bitonic_tl
|
||||
|
||||
FUNC=sort
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
CPP ?= 0
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -1,81 +0,0 @@
|
|||
//#pragma once
|
||||
/*----------------------------------------------------------------------------
|
||||
*
|
||||
* Author: Liang Ma (liang-ma@polito.it)
|
||||
*
|
||||
*----------------------------------------------------------------------------
|
||||
*/
|
||||
#ifdef CUSTOM_DRIVER
|
||||
#include "bm_wrapper.h"
|
||||
#include "rocc.h"
|
||||
#endif
|
||||
#include "time.h"
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
//#define GLOBAL
|
||||
|
||||
// Total exp of actual size
|
||||
#define EXP 3
|
||||
|
||||
// Total exp of arr buffer size
|
||||
#define LIMIT 2
|
||||
//#include "bitonic_accel.cpp"
|
||||
#include "bitonic.h"
|
||||
|
||||
#ifdef CUSTOM_DRIVER
|
||||
uint64_t vtop_translate(uint64_t src){
|
||||
#define XCUSTOM_ACC 3
|
||||
//asm volatile ("fence.i");
|
||||
uint64_t ret;
|
||||
ROCC_INSTRUCTION_DS(XCUSTOM_ACC, ret, src, 0);
|
||||
//asm volatile ("fence.i");
|
||||
printf ("Translate Addr VA %ld to PA %ld\t", src, ret);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// input and output parameters
|
||||
int SIZE = ARRAY_SIZE;
|
||||
typedef int TYPE;
|
||||
TYPE h_a[SIZE];
|
||||
|
||||
int dir = 1;
|
||||
|
||||
int i = 0;
|
||||
for(i = 0;i<SIZE;i++)
|
||||
{
|
||||
//h_a[i]=rand()%(100*ARRAY_SIZE);
|
||||
h_a[i] = i;
|
||||
}
|
||||
|
||||
uint64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_DRIVER
|
||||
sort_wrapper(h_a, dir);
|
||||
#else
|
||||
sort(h_a, dir);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
|
||||
int err = 0;
|
||||
for (i = 0 ; i < SIZE; i++)
|
||||
{
|
||||
printf("array[%d]=%d\n", i, h_a[i]);
|
||||
if(i+1 == SIZE)
|
||||
break;
|
||||
if(( h_a[i] > h_a[i + 1])!=dir){
|
||||
err++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
printf("There is/are %d error(s).\n", err);
|
||||
if(err!=0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=bitonic
|
||||
|
||||
FUNC=encrypt
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -1,26 +1,22 @@
|
|||
//#pragma once
|
||||
#include "rocc.h"
|
||||
/*----------------------------------------------------------------------------
|
||||
*
|
||||
* Author: Liang Ma (liang-ma@polito.it)
|
||||
*
|
||||
*----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "time.h"
|
||||
#ifdef CUSTOM_INST
|
||||
#include "bm_wrapper.h"
|
||||
#endif
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
//#define GLOBAL
|
||||
#include "../os_utils.h"
|
||||
|
||||
// Total exp of actual size
|
||||
#define EXP 3
|
||||
#define EXP 9
|
||||
|
||||
// Total exp of arr buffer size
|
||||
#define LIMIT 2
|
||||
#define LIMIT 4
|
||||
//#include "bitonic_accel.cpp"
|
||||
#include "bitonic.h"
|
||||
|
||||
|
@ -39,28 +35,31 @@ int main(int argc, char** argv)
|
|||
//h_a[i]=rand()%(100*ARRAY_SIZE);
|
||||
h_a[i] = i;
|
||||
}
|
||||
|
||||
uint64_t begin, end;
|
||||
begin = read_cycle();
|
||||
uint64_t begin, end, dur;
|
||||
volatile int block;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_INST
|
||||
sort_wrapper((uint64_t)h_a, (uint64_t)dir);
|
||||
#define XCUSTOM_ACC 1
|
||||
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block, vtop_translate(h_a), vtop_translate(dir), 0);
|
||||
//ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, block, h_a, dir, 0);
|
||||
#else
|
||||
sort(h_a, dir);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
|
||||
|
||||
int err = 0;
|
||||
for (i = 0 ; i < SIZE; i++)
|
||||
{
|
||||
printf("array[%d]=%d\n", i, h_a[i]);
|
||||
if(i+1 == SIZE)
|
||||
break;
|
||||
if(( h_a[i] > h_a[i + 1])!=dir){
|
||||
err++;
|
||||
}
|
||||
|
||||
}
|
||||
// for (i = 0 ; i < SIZE; i++)
|
||||
// {
|
||||
// printf("array[%d]=%d\n", i, h_a[i]);
|
||||
// if(i+1 == SIZE)
|
||||
// break;
|
||||
// if(( h_a[i] > h_a[i + 1])!=dir){
|
||||
// err++;
|
||||
// }
|
||||
//
|
||||
// }
|
||||
|
||||
printf("There is/are %d error(s).\n", err);
|
||||
if(err!=0)
|
|
@ -71,7 +71,7 @@ void mergeLocal(int* array, int bits, int id, int dir){
|
|||
memcpy(array + id * LOCAL_SIZE, localArray, LOCAL_SIZE * sizeof(int));
|
||||
}
|
||||
|
||||
int sort(int* array, int dir){
|
||||
void sort(int* array, int dir){
|
||||
#pragma HLS INTERFACE ap_bus depth=10 port=array
|
||||
|
||||
#pragma HLS INLINE
|
||||
|
@ -120,7 +120,6 @@ int sort(int* array, int dir){
|
|||
mergeLocal(bits, id, dir);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
// Based on code by Schuyler Eldridge. Copyright (c) Boston University
|
||||
// https://github.com/seldridge/rocket-rocc-examples/blob/master/src/main/c/rocc.h
|
||||
|
||||
#ifndef SRC_MAIN_C_ROCC_H
|
||||
#define SRC_MAIN_C_ROCC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define STR1(x) #x
|
||||
#define STR(x) STR1(x)
|
||||
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
|
||||
|
||||
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
|
||||
#define CUSTOM_0 0b0001011
|
||||
#define CUSTOM_1 0b0101011
|
||||
#define CUSTOM_2 0b1011011
|
||||
#define CUSTOM_3 0b1111011
|
||||
|
||||
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
|
||||
CUSTOMX_OPCODE(X) | \
|
||||
(rd << (7)) | \
|
||||
(xs2 << (7+5)) | \
|
||||
(xs1 << (7+5+1)) | \
|
||||
(xd << (7+5+2)) | \
|
||||
(rs1 << (7+5+3)) | \
|
||||
(rs2 << (7+5+3+5)) | \
|
||||
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
|
||||
|
||||
// Standard macro that passes rd, rs1, and rs2 via registers
|
||||
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
|
||||
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION_D(X, rd, funct) \
|
||||
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
|
||||
|
||||
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
|
||||
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION(X, funct) \
|
||||
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
|
||||
|
||||
// rd, rs1, and rs2 are data
|
||||
// rd_n, rs_1, and rs2_n are the register numbers to use
|
||||
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
: "=r" (rd_) \
|
||||
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
|
||||
}
|
||||
|
||||
#endif // SRC_MAIN_C_ACCUMULATOR_H
|
|
@ -0,0 +1,19 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=bitonic_tl
|
||||
|
||||
FUNC=sort
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -0,0 +1,106 @@
|
|||
//#pragma once
|
||||
|
||||
#include "../os_utils.h"
|
||||
#define ACCEL_CONTROL 0x30000
|
||||
#define AP_DONE_MASK 0b10
|
||||
|
||||
#define ACCEL_INT 0x30004
|
||||
#define ACCEL_SRC 0x30010
|
||||
#define ACCEL_DIR 0x3001c
|
||||
/*----------------------------------------------------------------------------
|
||||
*
|
||||
* Author: Liang Ma (liang-ma@polito.it)
|
||||
*
|
||||
*----------------------------------------------------------------------------
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
//#define GLOBAL
|
||||
|
||||
// Total exp of actual size
|
||||
#define EXP 3
|
||||
|
||||
// Total exp of arr buffer size
|
||||
#define LIMIT 2
|
||||
//#include "bitonic_accel.cpp"
|
||||
#include "bitonic.h"
|
||||
|
||||
#define ARRAY_SIZE 8
|
||||
int sort_accel (int* src, int dir){
|
||||
|
||||
//uint64_t src_pa = vtop_translate((uint64_t)src);
|
||||
uint64_t src_pa = (uint64_t)src;
|
||||
|
||||
// Disable interrupt for now
|
||||
//reg_write32(ACCEL_INT, 0x0);
|
||||
|
||||
access_addr(ACCEL_INT, OUT, 0);
|
||||
|
||||
uint64_t addr;
|
||||
addr = vtop_translate(src);
|
||||
access_addr(ACCEL_SRC, OUT, addr);
|
||||
access_addr(ACCEL_SRC + 4, OUT, addr >> 32);
|
||||
|
||||
access_addr(ACCEL_DIR, OUT, dir);
|
||||
|
||||
// Set up pointer a and pointer b address
|
||||
// reg_write32(ACCEL_SRC, (uint64_t)src_pa);
|
||||
// reg_write32(ACCEL_DIR, (uint32_t)dir);
|
||||
|
||||
// Write to ap_start to start the execution
|
||||
access_addr(ACCEL_CONTROL, OUT, 0x1);
|
||||
//reg_write32(ACCEL_CONTROL, 0x1);
|
||||
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
// done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
|
||||
done = access_addr(ACCEL_CONTROL, IN, 0x1) & AP_DONE_MASK;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// input and output parameters
|
||||
int SIZE = ARRAY_SIZE;
|
||||
typedef int TYPE;
|
||||
TYPE h_a[SIZE];
|
||||
|
||||
int dir = 1;
|
||||
|
||||
int i = 0;
|
||||
for(i = 0;i<SIZE;i++)
|
||||
{
|
||||
//h_a[i]=rand()%(100*ARRAY_SIZE);
|
||||
h_a[i] = i;
|
||||
}
|
||||
|
||||
uint64_t begin, end, dur;
|
||||
begin = read_cycle();
|
||||
#ifdef CUSTOM_DRIVER
|
||||
sort_accel(h_a, dir);
|
||||
#else
|
||||
sort(h_a, dir);
|
||||
#endif
|
||||
end = read_cycle();
|
||||
duration(begin, end);
|
||||
int err;
|
||||
for (i = 0 ; i < SIZE; i++)
|
||||
{
|
||||
printf("array[%d]=%d\n", i, h_a[i]);
|
||||
if(i+1 == SIZE)
|
||||
break;
|
||||
if(( h_a[i] > h_a[i + 1])!=dir){
|
||||
err++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
printf("There is/are %d error(s).\n", err);
|
||||
if(err!=0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
|
@ -1,25 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=bram_tl
|
||||
|
||||
FUNC=top
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -1,155 +0,0 @@
|
|||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#define ACCEL_CONTROL 0x20000
|
||||
#define AP_DONE_MASK 0b10
|
||||
|
||||
#define ACCEL_INT 0x20004
|
||||
#define ACCEL_RETURN 0x20010
|
||||
#define ACCEL_PARAM1 0x20018
|
||||
#define ACCEL_PARAM2 0x20020
|
||||
#define ACCEL_NIC 0x2002c
|
||||
#define ACCEL_BRAM 0x22000
|
||||
|
||||
#define ICENET_NAME "icenet"
|
||||
#define ICENET_IO_BASE 0x10016000
|
||||
#define ICENET_SEND_REQ 0
|
||||
#define ICENET_RECV_REQ 8
|
||||
#define ICENET_SEND_COMP 16
|
||||
#define ICENET_RECV_COMP 18
|
||||
#define ICENET_COUNTS 20
|
||||
#define ICENET_MACADDR 24
|
||||
#define ICENET_IO_SIZE 32
|
||||
|
||||
#define CIRC_BUF_LEN 16
|
||||
#define ALIGN_BYTES 8
|
||||
#define ALIGN_MASK 0x7
|
||||
#define ALIGN_SHIFT 3
|
||||
|
||||
size_t sendq_max;
|
||||
size_t recvq_max;
|
||||
uint64_t mac;
|
||||
#ifdef CUSTOM_DRIVER
|
||||
#include "/scratch/qijing.huang/firesim_new/hls/sw/bm//mmio.h"
|
||||
int top_accel(int param1, int param2) {
|
||||
// Disable interrupt for now
|
||||
reg_write32(ACCEL_INT, 0x0);
|
||||
|
||||
// Set up pointer a and pointer b address
|
||||
reg_write32(ACCEL_PARAM1, (uint32_t)param1);
|
||||
reg_write32(ACCEL_PARAM2, (uint32_t)param2);
|
||||
//reg_write32(ACCEL_BRAM + param1, (uint32_t) 17);
|
||||
//reg_write32(ACCEL_BRAM + param2, (uint32_t) 18);
|
||||
|
||||
// reg_write32(ACCEL_BRAM, 0x1);
|
||||
// reg_write32(ACCEL_BRAM+(param1 << 2), 0x7);
|
||||
// reg_write32(ACCEL_BRAM+(param2 << 2), 0x8);
|
||||
|
||||
uint64_t srcmac = reg_read64(ICENET_IO_BASE+ICENET_MACADDR);
|
||||
printf("srcmac %d\n", srcmac);
|
||||
reg_write32(ACCEL_CONTROL+0x4000, srcmac);
|
||||
reg_write32(ACCEL_CONTROL+0x4004, srcmac >> 32);
|
||||
reg_write32(ACCEL_CONTROL+0x400c, srcmac);
|
||||
reg_write32(ACCEL_CONTROL+0x4010, srcmac >> 32);
|
||||
|
||||
// Write to ap_start to start the execution
|
||||
reg_write32(ACCEL_CONTROL, 0x1);
|
||||
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
|
||||
}
|
||||
|
||||
//int bram = reg_read32(ACCEL_BRAM);
|
||||
//printf("bram = %d\n", bram);
|
||||
//int ret = reg_read32(ACCEL_BRAM+param1);
|
||||
int ret = reg_read32(ACCEL_RETURN);
|
||||
printf("ret = %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int send_req_avail(uint64_t* nic)
|
||||
{
|
||||
return nic[ICENET_COUNTS] & 0xf;
|
||||
}
|
||||
|
||||
static inline int recv_req_avail(uint64_t* nic)
|
||||
{
|
||||
return (nic[ICENET_COUNTS] >> 4) & 0xf;
|
||||
}
|
||||
|
||||
static inline int send_comp_avail(uint64_t* nic)
|
||||
{
|
||||
return (nic[ICENET_COUNTS] >> 8) & 0xf;
|
||||
}
|
||||
|
||||
static inline int recv_comp_avail(uint64_t* nic)
|
||||
{
|
||||
return (nic[ICENET_COUNTS] >> 12) & 0xf;
|
||||
}
|
||||
|
||||
|
||||
void ice_post_send(uint64_t* nic, int last, uintptr_t paddr, size_t len)
|
||||
{
|
||||
uint64_t command = 0;
|
||||
|
||||
if( ((paddr & 0x7ll) != 0) ||
|
||||
((len % 8) != 0)) {
|
||||
printf("paddr: 0x%lx, len = 0x%lx\n", paddr, len);
|
||||
}
|
||||
command = (len << 48) | (paddr & 0xffffffffffffL);
|
||||
command |= last ? 0 : (1ul << 63);
|
||||
|
||||
/* iowrite64(command, nic->iomem + ICENET_SEND_REQ); */
|
||||
//writeq(command, nic->iomem + ICENET_SEND_REQ);
|
||||
nic[ICENET_SEND_REQ] = command;
|
||||
}
|
||||
|
||||
void ice_post_recv(uint64_t* nic, uintptr_t paddr)
|
||||
{
|
||||
if((paddr & 0x7) != 0) {
|
||||
//panic("Unaligned receive buffer: %lx\n", paddr);
|
||||
;
|
||||
}
|
||||
/* iowrite64(paddr, nic->iomem + ICENET_RECV_REQ); */
|
||||
//writeq(paddr, nic->iomem + ICENET_RECV_REQ);
|
||||
nic[ICENET_RECV_REQ] = paddr;
|
||||
}
|
||||
|
||||
void ice_drain_sendq(uint64_t* nic)
|
||||
{
|
||||
/* Poll until there are no more pending sends */
|
||||
while(send_req_avail(nic) < sendq_max) {
|
||||
;
|
||||
}
|
||||
|
||||
/* Drain send_compq */
|
||||
while (send_comp_avail(nic) > 0) {
|
||||
//ioread16(nic->iomem + ICENET_SEND_COMP);
|
||||
uint64_t tmp = nic[ICENET_SEND_COMP];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
size_t ice_recv_one(uint64_t* nic)
|
||||
{
|
||||
/* Wait for there to be something in the recv_comp Q */
|
||||
while(recv_comp_avail(nic) == 0) { ; }
|
||||
|
||||
/* Pop exactly one thing off Q */
|
||||
//return (size_t)ioread16(nic->iomem + ICENET_RECV_COMP);
|
||||
return (size_t)nic[ICENET_RECV_COMP];
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
#ifdef CUSTOM_DRIVER
|
||||
top_accel(4,12);
|
||||
#endif
|
||||
printf("main\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "ap_int.h"
|
||||
#include "hls_stream.h"
|
||||
#define ACCEL_CONTROL 0x20000
|
||||
#define AP_DONE_MASK 0b10
|
||||
|
||||
#define ACCEL_INT 0x20004
|
||||
#define ACCEL_RETURN 0x20010
|
||||
#define ACCEL_PARAM1 0x20018
|
||||
#define ACCEL_PARAM2 0x20020
|
||||
#define ACCEL_NIC 0x2002c
|
||||
#define ACCEL_BRAM 0x22000
|
||||
|
||||
#define ICENET_NAME "icenet"
|
||||
#define ICENET_IO_BASE 0x10016000
|
||||
#define ICENET_SEND_REQ 0
|
||||
#define ICENET_RECV_REQ 8
|
||||
#define ICENET_SEND_COMP 16
|
||||
#define ICENET_RECV_COMP 18
|
||||
#define ICENET_COUNTS 20
|
||||
#define ICENET_MACADDR 24
|
||||
#define ICENET_IO_SIZE 32
|
||||
|
||||
#define CIRC_BUF_LEN 16
|
||||
#define ALIGN_BYTES 8
|
||||
#define ALIGN_MASK 0x7
|
||||
#define ALIGN_SHIFT 3
|
||||
|
||||
size_t sendq_max;
|
||||
size_t recvq_max;
|
||||
uint64_t mac;
|
||||
#ifdef CUSTOM_DRIVER
|
||||
#include "/scratch/qijing.huang/firesim_new/hls/sw/bm//mmio.h"
|
||||
|
||||
int top_accel(int param1, int param2) {
|
||||
// Disable interrupt for now
|
||||
reg_write32(ACCEL_INT, 0x0);
|
||||
|
||||
// Set up pointer a and pointer b address
|
||||
reg_write32(ACCEL_PARAM1, (uint32_t)param1);
|
||||
reg_write32(ACCEL_PARAM2, (uint32_t)param2);
|
||||
//reg_write32(ACCEL_BRAM + param1, (uint32_t) 17);
|
||||
//reg_write32(ACCEL_BRAM + param2, (uint32_t) 18);
|
||||
|
||||
reg_write32(ACCEL_BRAM, 0x1);
|
||||
reg_write32(ACCEL_BRAM+(param1 << 2), 0x7);
|
||||
reg_write32(ACCEL_BRAM+(param2 << 2), 0x8);
|
||||
|
||||
// Write to ap_start to start the execution
|
||||
reg_write32(ACCEL_CONTROL, 0x1);
|
||||
//printf("Accel Control: %x\n", reg_read32(ACCEL_CONTROL));
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
done = reg_read32(ACCEL_CONTROL) & AP_DONE_MASK;
|
||||
}
|
||||
|
||||
|
||||
int bram = reg_read32(ACCEL_BRAM);
|
||||
printf("bram = %d\n", bram);
|
||||
//int ret = reg_read32(ACCEL_BRAM+param1);
|
||||
int ret = reg_read32(ACCEL_RETURN);
|
||||
printf("ret = %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int send_req_avail(uint64_t* nic)
|
||||
{
|
||||
return nic[ICENET_COUNTS] & 0xf;
|
||||
}
|
||||
|
||||
static inline int recv_req_avail(uint64_t* nic)
|
||||
{
|
||||
return (nic[ICENET_COUNTS] >> 4) & 0xf;
|
||||
}
|
||||
|
||||
static inline int send_comp_avail(uint64_t* nic)
|
||||
{
|
||||
return (nic[ICENET_COUNTS] >> 8) & 0xf;
|
||||
}
|
||||
|
||||
static inline int recv_comp_avail(uint64_t* nic)
|
||||
{
|
||||
return (nic[ICENET_COUNTS] >> 12) & 0xf;
|
||||
}
|
||||
|
||||
|
||||
void ice_post_send(uint64_t* nic, int last, uintptr_t paddr, size_t len)
|
||||
{
|
||||
uint64_t command = 0;
|
||||
|
||||
if( ((paddr & 0x7ll) != 0) ||
|
||||
((len % 8) != 0)) {
|
||||
printf("paddr: 0x%lx, len = 0x%lx\n", paddr, len);
|
||||
}
|
||||
command = (len << 48) | (paddr & 0xffffffffffffL);
|
||||
command |= last ? 0 : (1ul << 63);
|
||||
|
||||
/* iowrite64(command, nic->iomem + ICENET_SEND_REQ); */
|
||||
//writeq(command, nic->iomem + ICENET_SEND_REQ);
|
||||
nic[ICENET_SEND_REQ] = command;
|
||||
}
|
||||
|
||||
void ice_post_recv(uint64_t* nic, uintptr_t paddr)
|
||||
{
|
||||
if((paddr & 0x7) != 0) {
|
||||
//panic("Unaligned receive buffer: %lx\n", paddr);
|
||||
;
|
||||
}
|
||||
/* iowrite64(paddr, nic->iomem + ICENET_RECV_REQ); */
|
||||
//writeq(paddr, nic->iomem + ICENET_RECV_REQ);
|
||||
nic[ICENET_RECV_REQ] = paddr;
|
||||
}
|
||||
|
||||
void ice_drain_sendq(uint64_t* nic)
|
||||
{
|
||||
/* Poll until there are no more pending sends */
|
||||
while(send_req_avail(nic) < sendq_max) {
|
||||
;
|
||||
}
|
||||
|
||||
/* Drain send_compq */
|
||||
while (send_comp_avail(nic) > 0) {
|
||||
//ioread16(nic->iomem + ICENET_SEND_COMP);
|
||||
uint64_t tmp = nic[ICENET_SEND_COMP];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
size_t ice_recv_one(uint64_t* nic)
|
||||
{
|
||||
/* Wait for there to be something in the recv_comp Q */
|
||||
while(recv_comp_avail(nic) == 0) { ; }
|
||||
|
||||
/* Pop exactly one thing off Q */
|
||||
//return (size_t)ioread16(nic->iomem + ICENET_RECV_COMP);
|
||||
return (size_t)nic[ICENET_RECV_COMP];
|
||||
}
|
||||
|
||||
// val ethType = UInt(ETH_TYPE_BITS.W)
|
||||
// val srcmac = UInt(ETH_MAC_BITS.W)
|
||||
// val dstmac = UInt(ETH_MAC_BITS.W)
|
||||
// val padding = UInt(ETH_PAD_BITS.W)
|
||||
// val NET_IF_WIDTH = 64
|
||||
// val NET_IF_BYTES = NET_IF_WIDTH/8
|
||||
// val NET_LEN_BITS = 16
|
||||
//
|
||||
// val ETH_MAX_BYTES = 1520
|
||||
// val ETH_HEAD_BYTES = 16
|
||||
// val ETH_MAC_BITS = 48
|
||||
// val ETH_TYPE_BITS = 16
|
||||
// val ETH_PAD_BITS = 16
|
||||
|
||||
int top(int param1, int param2, uint64_t* nic, int paramtable[1600], hls::stream<ap_uint<128> >& req_head, hls::stream<ap_uint<65> >& req_data, hls::stream<ap_uint<128> >& resp_head, hls::stream<ap_uint<65> >& resp_data, ap_uint<64>srcmac, ap_uint<64>dstmac) {
|
||||
//#pragma HLS dataflow
|
||||
#pragma HLS INTERFACE s_axilite port=nic bundle=control
|
||||
#pragma HLS INTERFACE m_axi port=nic offset=slave bundle=gmem0
|
||||
#pragma HLS interface ap_fifo port=req_head
|
||||
#pragma HLS interface ap_fifo port=req_data
|
||||
#pragma HLS interface ap_fifo port=resp_head
|
||||
#pragma HLS interface ap_fifo port=resp_data
|
||||
|
||||
#pragma HLS interface s_axilite bundle=control port=param1
|
||||
#pragma HLS interface s_axilite bundle=control port=param2
|
||||
#pragma HLS interface s_axilite bundle=control port=paramtable
|
||||
#pragma HLS interface s_axilite bundle=control port=srcmac
|
||||
#pragma HLS interface s_axilite bundle=control port=dstmac
|
||||
#pragma HLS interface s_axilite bundle=control port=return
|
||||
|
||||
ap_uint<16> RMEM_REQ_ETH_TYPE = 0x0408L;
|
||||
ap_uint<16> RMEM_RESP_ETH_TYPE = 0x0508L;
|
||||
|
||||
ap_uint<128> resp_eth_head;
|
||||
ap_uint<128> req_eth_head;
|
||||
|
||||
resp_eth_head.range(15,0) = 0;
|
||||
resp_eth_head.range(63,16) = dstmac;
|
||||
resp_eth_head.range(111,64) = srcmac;
|
||||
resp_eth_head.range(127,112) = RMEM_RESP_ETH_TYPE;
|
||||
ap_uint<65> send_data;
|
||||
send_data(63,0) = 77;
|
||||
send_data(64,64) = 1;
|
||||
|
||||
// Send a request
|
||||
volatile int count = 1;
|
||||
if (count == 1)
|
||||
resp_head.write(resp_eth_head);
|
||||
count ++;
|
||||
if (count == 2)
|
||||
resp_data.write(send_data);
|
||||
count ++;
|
||||
|
||||
// Loopback
|
||||
|
||||
ap_uint<64> read_head;
|
||||
ap_uint<65> recv_data;
|
||||
if (count == 3)
|
||||
read_head = req_head.read();
|
||||
count ++;
|
||||
if (count == 4)
|
||||
recv_data = req_data.read();
|
||||
count ++;
|
||||
//recvq_max = recv_req_avail(nic);
|
||||
//sendq_max = send_req_avail(nic);
|
||||
//int sum = paramtable[param1] + paramtable[param2];
|
||||
//printf("recvq_max %d\n", recvq_max);
|
||||
//printf("sendq_max %d\n", sendq_max);
|
||||
return recv_data.range(63,0) + recv_data.range(64,64);
|
||||
}
|
||||
|
||||
int main(){
|
||||
|
||||
#ifdef CUSTOM_DRIVER
|
||||
top_accel(4,12);
|
||||
#endif
|
||||
printf("main\n");
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#define _XOPEN_SOURCE 700
|
||||
#include <fcntl.h> /* open */
|
||||
#include <stdint.h> /* uint64_t */
|
||||
#include <stdlib.h> /* size_t */
|
||||
#include <unistd.h> /* pread, sysconf */
|
||||
|
||||
/* Format documented at:
|
||||
* https://github.com/torvalds/linux/blob/v4.9/Documentation/vm/pagemap.txt
|
||||
**/
|
||||
typedef struct {
|
||||
uint64_t pfn : 54;
|
||||
unsigned int soft_dirty : 1;
|
||||
unsigned int file_page : 1;
|
||||
unsigned int swapped : 1;
|
||||
unsigned int present : 1;
|
||||
} PagemapEntry;
|
||||
|
||||
/* Parse the pagemap entry for the given virtual address.
|
||||
*
|
||||
* @param[out] entry the parsed entry
|
||||
* @param[in] pagemap_fd file descriptor to an open /proc/pid/pagemap file
|
||||
* @param[in] vaddr virtual address to get entry for
|
||||
* @return 0 for success, 1 for failure
|
||||
*/
|
||||
int pagemap_get_entry(PagemapEntry *entry, int pagemap_fd, uintptr_t vaddr)
|
||||
{
|
||||
size_t nread;
|
||||
ssize_t ret;
|
||||
uint64_t data;
|
||||
|
||||
nread = 0;
|
||||
while (nread < sizeof(data)) {
|
||||
ret = pread(pagemap_fd, &data, sizeof(data),
|
||||
(vaddr / sysconf(_SC_PAGE_SIZE)) * sizeof(data) + nread);
|
||||
nread += ret;
|
||||
if (ret <= 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
entry->pfn = data & (((uint64_t)1 << 54) - 1);
|
||||
entry->soft_dirty = (data >> 54) & 1;
|
||||
entry->file_page = (data >> 61) & 1;
|
||||
entry->swapped = (data >> 62) & 1;
|
||||
entry->present = (data >> 63) & 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Convert the given virtual address to physical using /proc/PID/pagemap.
|
||||
*
|
||||
* @param[out] paddr physical address
|
||||
* @param[in] pid process to convert for
|
||||
* @param[in] vaddr virtual address to get entry for
|
||||
* @return 0 for success, 1 for failure
|
||||
*/
|
||||
int virt_to_phys_user(uintptr_t *paddr, pid_t pid, uintptr_t vaddr)
|
||||
{
|
||||
char pagemap_file[BUFSIZ];
|
||||
int pagemap_fd;
|
||||
|
||||
snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
|
||||
pagemap_fd = open(pagemap_file, O_RDONLY);
|
||||
if (pagemap_fd < 0) {
|
||||
printf("invalid fd!\n");
|
||||
return 1;
|
||||
}
|
||||
PagemapEntry entry;
|
||||
if (pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
|
||||
printf("invalid entry!\n");
|
||||
return 1;
|
||||
}
|
||||
close(pagemap_fd);
|
||||
*paddr = (entry.pfn * sysconf(_SC_PAGE_SIZE)) + (vaddr % sysconf(_SC_PAGE_SIZE));
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Remember: mmap, like most fops, does not work with debugfs as of 4.9! https://patchwork.kernel.org/patch/9252557/
|
||||
* Adapted from:
|
||||
* https://coherentmusings.wordpress.com/2014/06/10/implementing-mmap-for-transferring-data-from-user-space-to-kernel-space/
|
||||
* */
|
||||
|
||||
#include <asm/uaccess.h> /* copy_from_user */
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h> /* min */
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmacache.h>
|
||||
#include <linux/shm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/khugepaged.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/rbtree_augmented.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/oom.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
static const char *filename = "custom_mmap";
|
||||
|
||||
static struct mmap_info *info;
|
||||
|
||||
enum { BUFFER_SIZE = 4 , ORDER=8};
|
||||
|
||||
struct mmap_info {
|
||||
char *data;
|
||||
};
|
||||
|
||||
/* After unmap. */
|
||||
static void vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
pr_info("vm_close\n");
|
||||
}
|
||||
|
||||
/* First page access. */
|
||||
//static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
static int vm_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
unsigned long vm_start = vma->vm_start;
|
||||
unsigned long vm_end = vma->vm_end;
|
||||
unsigned long vmf_addr = vmf->address;
|
||||
pr_info("vm_start %llx, vm_end %llx, vmf_addr %llx.\n", vm_start, vm_end, vmf_addr);
|
||||
// Create linear mapping
|
||||
unsigned long offset = vmf_addr - vm_start;
|
||||
|
||||
struct page *page;
|
||||
struct mmap_info *info;
|
||||
|
||||
pr_info("vm_fault\n");
|
||||
info = (struct mmap_info *)vma->vm_private_data;
|
||||
// If the base logical addr of kernel buffer exists
|
||||
if (info->data) {
|
||||
page = virt_to_page((info->data) + offset);
|
||||
|
||||
pr_info("phy %llx virt %llx\n", page_to_phys(page), (info->data) + offset);
|
||||
get_page(page);
|
||||
vmf->page = page;
|
||||
int ret = remap_pfn_range(vma, vmf_addr, page_to_pfn(page), PAGE_SIZE, vma->vm_page_prot);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */
|
||||
static void vm_open(struct vm_area_struct *vma)
|
||||
{
|
||||
pr_info("vm_open\n");
|
||||
}
|
||||
|
||||
static struct vm_operations_struct vm_ops =
|
||||
{
|
||||
.close = vm_close,
|
||||
.fault = vm_fault,
|
||||
.open = vm_open,
|
||||
};
|
||||
|
||||
static int mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
pr_info("mmap\n");
|
||||
vma->vm_ops = &vm_ops;
|
||||
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
|
||||
vma->vm_private_data = filp->private_data;
|
||||
vm_open(vma);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
// struct mmap_info *info;
|
||||
|
||||
pr_info("open\n");
|
||||
// info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
|
||||
// pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info));
|
||||
// //info->data = (char *)get_zeroed_page(GFP_KERNEL);
|
||||
|
||||
// unsigned long order = ORDER; // TODO add module args
|
||||
// // data maps to logical address of the buffer
|
||||
// info->data = (char* )__get_free_pages(GFP_KERNEL, order);
|
||||
// if (!info->data) {
|
||||
// pr_info("Fail to allocate free pages!\n");
|
||||
// /* insufficient memory: you must handle this error! */
|
||||
// return -ENOMEM;
|
||||
// }
|
||||
// pr_info("kernel logical addr 0x%llx\n", (unsigned long)(info->data));
|
||||
|
||||
// // By opening the file, a new vma struct is created
|
||||
memcpy(info->data, "asdf", BUFFER_SIZE);
|
||||
filp->private_data = info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
|
||||
{
|
||||
struct mmap_info *info;
|
||||
int ret;
|
||||
|
||||
// off is the file offset dont use it!
|
||||
//pr_info("read offset %ld\n", off);
|
||||
unsigned long offset = 4500;
|
||||
pr_info("read offset %ld\n", offset);
|
||||
info = filp->private_data;
|
||||
ret = min(len, (size_t)BUFFER_SIZE);
|
||||
//if (copy_to_user(buf, (unsigned long)(info->data) + (unsigned long)off, ret)) {
|
||||
if (copy_to_user(buf, (unsigned long)(info->data) + (unsigned long)offset, ret)) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
|
||||
{
|
||||
struct mmap_info *info;
|
||||
|
||||
unsigned long offset = 4500;
|
||||
pr_info("write offset %ld\n", offset);
|
||||
info = filp->private_data;
|
||||
if (copy_from_user((unsigned long)(info->data) + (unsigned long)offset, buf, min(len, (size_t)BUFFER_SIZE))) {
|
||||
return -EFAULT;
|
||||
} else {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
static int release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
// struct mmap_info *info;
|
||||
|
||||
// pr_info("release\n");
|
||||
// info = filp->private_data;
|
||||
// //free_page((unsigned long)info->data);
|
||||
// unsigned long order = ORDER;
|
||||
// free_pages(info->data, order);
|
||||
// printk("Freeing 2 ^ %d pages\n", order);
|
||||
// kfree(info);
|
||||
// filp->private_data = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations fops = {
|
||||
.mmap = mmap,
|
||||
.open = open,
|
||||
.release = release,
|
||||
.read = read,
|
||||
.write = write,
|
||||
};
|
||||
|
||||
static int myinit(void)
|
||||
{
|
||||
proc_create(filename, 0, NULL, &fops);
|
||||
|
||||
pr_info("open\n");
|
||||
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
|
||||
//info->data = (char *)get_zeroed_page(GFP_KERNEL);
|
||||
|
||||
unsigned long order = ORDER; // TODO add module args
|
||||
// data maps to logical address of the buffer
|
||||
info->data = (char* )__get_free_pages(GFP_KERNEL, order);
|
||||
|
||||
pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info->data));
|
||||
if (!info->data) {
|
||||
pr_info("Fail to allocate free pages!\n");
|
||||
/* insufficient memory: you must handle this error! */
|
||||
return -ENOMEM;
|
||||
}
|
||||
pr_info("kernel logical addr 0x%llx\n", (unsigned long)(info->data));
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void myexit(void)
|
||||
{
|
||||
//info = filp->private_data;
|
||||
//free_page((unsigned long)info->data);
|
||||
unsigned long order = ORDER;
|
||||
free_pages(info->data, order);
|
||||
printk("Freeing 2 ^ %d pages\n", order);
|
||||
kfree(info);
|
||||
|
||||
remove_proc_entry(filename, NULL);
|
||||
}
|
||||
|
||||
module_init(myinit)
|
||||
module_exit(myexit)
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
#ifndef __ASM_GENERIC_MMAN_COMMON_H
|
||||
#define __ASM_GENERIC_MMAN_COMMON_H
|
||||
|
||||
/*
|
||||
Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
|
||||
Based on: asm-xxx/mman.h
|
||||
*/
|
||||
|
||||
#define PROT_READ 0x1 /* page can be read */
|
||||
#define PROT_WRITE 0x2 /* page can be written */
|
||||
#define PROT_EXEC 0x4 /* page can be executed */
|
||||
#define PROT_SEM 0x8 /* page may be used for atomic ops */
|
||||
#define PROT_NONE 0x0 /* page can not be accessed */
|
||||
#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
|
||||
#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
|
||||
|
||||
#define MAP_SHARED 0x01 /* Share changes */
|
||||
#define MAP_PRIVATE 0x02 /* Changes are private */
|
||||
#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
|
||||
#define MAP_TYPE 0x0f /* Mask for type of mapping */
|
||||
#define MAP_FIXED 0x10 /* Interpret addr exactly */
|
||||
#define MAP_ANONYMOUS 0x20 /* don't use a file */
|
||||
#define MAP_ALLOC 0x40 /* don't use a file */
|
||||
#define MAP_FREE 0x80 /* don't use a file */
|
||||
#define MAP_COPY_FROM_USER 0x100 /* don't use a file */
|
||||
#define MAP_COPY_TO_USER 0x200 /* don't use a file */
|
||||
#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
|
||||
# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */
|
||||
#else
|
||||
# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flags for mlock
|
||||
*/
|
||||
#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */
|
||||
|
||||
#define MS_ASYNC 1 /* sync memory asynchronously */
|
||||
#define MS_INVALIDATE 2 /* invalidate the caches */
|
||||
#define MS_SYNC 4 /* synchronous memory sync */
|
||||
|
||||
#define MADV_NORMAL 0 /* no further special treatment */
|
||||
#define MADV_RANDOM 1 /* expect random page references */
|
||||
#define MADV_SEQUENTIAL 2 /* expect sequential page references */
|
||||
#define MADV_WILLNEED 3 /* will need these pages */
|
||||
#define MADV_DONTNEED 4 /* don't need these pages */
|
||||
|
||||
/* common parameters: try to keep these consistent across architectures */
|
||||
#define MADV_FREE 8 /* free pages only if memory pressure */
|
||||
#define MADV_REMOVE 9 /* remove these pages & resources */
|
||||
#define MADV_DONTFORK 10 /* don't inherit across fork */
|
||||
#define MADV_DOFORK 11 /* do inherit across fork */
|
||||
#define MADV_HWPOISON 100 /* poison a page for testing */
|
||||
#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
|
||||
|
||||
#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
|
||||
#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
|
||||
|
||||
#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
|
||||
#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
|
||||
|
||||
#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
|
||||
overrides the coredump filter bits */
|
||||
#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
|
||||
|
||||
#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
|
||||
#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
|
||||
|
||||
/* compatibility flags */
|
||||
#define MAP_FILE 0
|
||||
|
||||
#define PKEY_DISABLE_ACCESS 0x1
|
||||
#define PKEY_DISABLE_WRITE 0x2
|
||||
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
|
||||
PKEY_DISABLE_WRITE)
|
||||
|
||||
#endif /* __ASM_GENERIC_MMAN_COMMON_H */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,58 @@
|
|||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h> /* uintmax_t */
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h> /* sysconf */
|
||||
|
||||
#include "common.h" /* virt_to_phys_user */
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
int mmap_init()
|
||||
{
|
||||
int fd;
|
||||
char * file = "/proc/origin_mmap";
|
||||
//page_size = sysconf(_SC_PAGE_SIZE);
|
||||
unsigned long page_size =PAGE_SIZE;
|
||||
//printf("page_size %d\n", page_size);
|
||||
//printf("open pathname = %s\n", file);
|
||||
fd = open(file, O_RDWR | O_SYNC);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
assert(0);
|
||||
}
|
||||
//printf("fd = %d\n", fd);
|
||||
return fd;
|
||||
}
|
||||
|
||||
unsigned long get_addr(int fd){
|
||||
unsigned long page_size =PAGE_SIZE;
|
||||
//printf("page_size %d\n", page_size);
|
||||
char *addr;
|
||||
|
||||
//puts("mmap");
|
||||
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
return addr;
|
||||
}
|
||||
|
||||
int mmap_delete(int fd, unsigned long addr) {
|
||||
unsigned long page_size =PAGE_SIZE;
|
||||
if (munmap(addr, page_size)) {
|
||||
perror("munmap");
|
||||
assert(0);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
char* copy_to_buffer(char* addr, unsigned length, int fd){
|
||||
char * target_addr = (void *)get_addr(fd);
|
||||
memcpy(target_addr, addr, length);
|
||||
//for (i = 0; i < length; i++){
|
||||
// target_addr[i] =
|
||||
//}
|
||||
return target_addr;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
/* Remember: mmap, like most fops, does not work with debugfs as of 4.9! https://patchwork.kernel.org/patch/9252557/
|
||||
|
||||
Adapted from:
|
||||
https://coherentmusings.wordpress.com/2014/06/10/implementing-mmap-for-transferring-data-from-user-space-to-kernel-space/
|
||||
*/
|
||||
|
||||
#include <linux/uaccess.h> /* copy_from_user */
|
||||
#include <asm/io.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h> /* min */
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#define PAGE_ORDER 8
|
||||
#define MEMSZ ((1 << PAGE_ORDER) * PAGE_SIZE)
|
||||
|
||||
static const char *filename = "test2";
|
||||
unsigned long kaddr = 0;
|
||||
char *buf;
|
||||
|
||||
static int mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
unsigned long pfn;
|
||||
pfn = virt_to_phys((void*)kaddr) >> PAGE_SHIFT;
|
||||
|
||||
if(remap_pfn_range(vma, vma->vm_start, pfn, (vma->vm_end - vma->vm_start),
|
||||
vma->vm_page_prot))
|
||||
{
|
||||
printk("remap failed...");
|
||||
return -1;
|
||||
}
|
||||
vma->vm_flags |= (VM_DONTDUMP|VM_DONTEXPAND);
|
||||
printk("remap_pfn_rang pfn:[%lu] ok.\n", pfn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
int i = 0;
|
||||
pr_info("release:\n");
|
||||
for(i = 0; i < MEMSZ; i += PAGE_SIZE) {
|
||||
pr_info("Page %d: %s\n", i, buf+i);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations fops = {
|
||||
.mmap = mmap,
|
||||
.release = release
|
||||
};
|
||||
|
||||
static int myinit(void)
|
||||
{
|
||||
int i;
|
||||
proc_create(filename, 0, NULL, &fops);
|
||||
/* alloc one page */
|
||||
kaddr = __get_free_pages(GFP_KERNEL, PAGE_ORDER);
|
||||
if (!kaddr) {
|
||||
printk("Allocate memory failure!/n");
|
||||
} else {
|
||||
//XXX This is techinically needed, but I'm lazy right now
|
||||
/* SetPageReserved(virt_to_page(kaddr)); */
|
||||
|
||||
buf = (char *)kaddr;
|
||||
|
||||
for(i = 0; i < MEMSZ; i += PAGE_SIZE) {
|
||||
sprintf(buf + i, "%d", i >> PAGE_SHIFT);
|
||||
}
|
||||
printk("Allocate memory success!.\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void myexit(void)
|
||||
{
|
||||
pr_info("mmap2 module exiting\n");
|
||||
/* ClearPageReserved(virt_to_page(kaddr)); */
|
||||
free_pages(kaddr, PAGE_ORDER);
|
||||
remove_proc_entry(filename, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
module_init(myinit)
|
||||
module_exit(myexit)
|
||||
MODULE_LICENSE("GPL");
|
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
Remember: mmap, like most fops, does not work with debugfs as of 4.9! https://patchwork.kernel.org/patch/9252557/
|
||||
|
||||
Adapted from:
|
||||
https://coherentmusings.wordpress.com/2014/06/10/implementing-mmap-for-transferring-data-from-user-space-to-kernel-space/
|
||||
*/
|
||||
|
||||
#include <asm/uaccess.h> /* copy_from_user */
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h> /* min */
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmacache.h>
|
||||
#include <linux/shm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/rmap.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
#include <linux/mmdebug.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/audit.h>
|
||||
#include <linux/khugepaged.h>
|
||||
#include <linux/uprobes.h>
|
||||
#include <linux/rbtree_augmented.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/userfaultfd_k.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/oom.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
|
||||
|
||||
static const char *filename = "origin_mmap";
|
||||
|
||||
enum { BUFFER_SIZE = 4 };
|
||||
|
||||
struct mmap_info {
|
||||
char *data;
|
||||
};
|
||||
|
||||
/* After unmap. */
|
||||
static void vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
//pr_info("vm_close\n");
|
||||
}
|
||||
|
||||
/* First page access. */
|
||||
//static int vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
//{
|
||||
static int vm_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
|
||||
unsigned long vm_start = vma->vm_start;
|
||||
unsigned long vm_end = vma->vm_end;
|
||||
unsigned long vmf_addr = vmf->address;
|
||||
//pr_info("vm_start %llx, vm_end %llx, vmf_addr %llx.\n", vm_start, vm_end, vmf_addr);
|
||||
// Create linear mapping
|
||||
unsigned long offset = vmf_addr - vm_start;
|
||||
|
||||
struct page *page;
|
||||
struct mmap_info *info;
|
||||
|
||||
//pr_info("vm_fault\n");
|
||||
//pr_info("page size %d\n", PAGE_SIZE);
|
||||
info = (struct mmap_info *)vma->vm_private_data;
|
||||
|
||||
if (info->data) {
|
||||
page = virt_to_page((info->data) + offset);
|
||||
|
||||
//pr_info("phy %llx virt %llx\n", page_to_phys(page), (info->data) + offset);
|
||||
//page = virt_to_page(info->data);
|
||||
get_page(page);
|
||||
vmf->page = page;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Aftr mmap. TODO vs mmap, when can this happen at a different time than mmap? */
|
||||
static void vm_open(struct vm_area_struct *vma)
|
||||
{
|
||||
//pr_info("vm_open\n");
|
||||
}
|
||||
|
||||
static struct vm_operations_struct vm_ops =
|
||||
{
|
||||
.close = vm_close,
|
||||
.fault = vm_fault,
|
||||
.open = vm_open,
|
||||
};
|
||||
|
||||
static int mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
//pr_info("mmap\n");
|
||||
// if (remap_pfn_range(vma, vma->vm_start, virt_to_pfn(flip->private_data->data), vma->vm_end - vma->vm_start, vma->vm_page_prot)) return -EAGAIN;
|
||||
vma->vm_ops = &vm_ops;
|
||||
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP ;
|
||||
vma->vm_private_data = filp->private_data;
|
||||
vm_open(vma);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct mmap_info *info;
|
||||
|
||||
//pr_info("open\n");
|
||||
info = kmalloc(sizeof(struct mmap_info), GFP_KERNEL);
|
||||
//info->data = (char *)get_zeroed_page(GFP_KERNEL);
|
||||
info->data = (char *)get_zeroed_page(GFP_KERNEL);
|
||||
//info->data = (char* )__get_free_pages(GFP_KERNEL, 8);
|
||||
//pr_info("virt_to_phys = 0x%llx\n", (unsigned long long)virt_to_phys((void *)info->data));
|
||||
memcpy(info->data, "asdf", BUFFER_SIZE);
|
||||
filp->private_data = info;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t read(struct file *filp, char __user *buf, size_t len, loff_t *off)
|
||||
{
|
||||
struct mmap_info *info;
|
||||
int ret;
|
||||
|
||||
//pr_info("read\n");
|
||||
info = filp->private_data;
|
||||
ret = min(len, (size_t)BUFFER_SIZE);
|
||||
if (copy_to_user(buf, info->data, ret)) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
|
||||
{
|
||||
struct mmap_info *info;
|
||||
|
||||
//pr_info("write\n");
|
||||
info = filp->private_data;
|
||||
if (copy_from_user(info->data, buf, min(len, (size_t)BUFFER_SIZE))) {
|
||||
return -EFAULT;
|
||||
} else {
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
static int release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct mmap_info *info;
|
||||
|
||||
//pr_info("release\n");
|
||||
info = filp->private_data;
|
||||
free_page((unsigned long)info->data);
|
||||
//free_pages(info->data, 8);
|
||||
kfree(info);
|
||||
filp->private_data = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations fops = {
|
||||
.mmap = mmap,
|
||||
.open = open,
|
||||
.release = release,
|
||||
.read = read,
|
||||
.write = write,
|
||||
};
|
||||
|
||||
static int myinit(void)
|
||||
{
|
||||
proc_create(filename, 0, NULL, &fops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void myexit(void)
|
||||
{
|
||||
remove_proc_entry(filename, NULL);
|
||||
}
|
||||
|
||||
module_init(myinit)
|
||||
module_exit(myexit)
|
||||
MODULE_LICENSE("GPL");
|
Binary file not shown.
|
@ -0,0 +1,19 @@
|
|||
#include "mmap_driver.c"
|
||||
|
||||
|
||||
int main (){
|
||||
|
||||
int a[4] = {0, 2, 3, 4};
|
||||
int *b;
|
||||
int fd = mmap_init();
|
||||
b = (int *)get_addr(fd);
|
||||
b[3] = a[1];
|
||||
a[2] = b[2];
|
||||
|
||||
int fd2 = mmap_init();
|
||||
int* c = (int *)get_addr(fd2);
|
||||
c[4] = b[5];
|
||||
mmap_delete(fd, b);
|
||||
mmap_delete(fd2, c);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include "common.h"
|
||||
|
||||
#define ORDER 8
|
||||
#define PAGE_SIZE 4096
|
||||
#define MEMSZ (PAGE_SIZE * (1 << ORDER))
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
uintptr_t paddr;
|
||||
char *vaddr;
|
||||
|
||||
/*memory map*/
|
||||
int map_fd = open("/proc/test2", O_RDWR|O_SYNC);
|
||||
if (map_fd < 0) {
|
||||
printf("cannot open file /proc/test2\n");
|
||||
return -1;
|
||||
}
|
||||
vaddr = mmap(NULL, MEMSZ, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED, map_fd, 0);
|
||||
|
||||
if (vaddr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
printf("MAP_FAILED : %s", vaddr);
|
||||
close(map_fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
char *buf;
|
||||
for(int i = 0; i < MEMSZ; i += PAGE_SIZE) {
|
||||
int val;
|
||||
buf = vaddr + i;
|
||||
printf("vaddr: %p \n", buf);
|
||||
assert(!virt_to_phys_user(&paddr, getpid(), (uintptr_t)buf));
|
||||
printf("paddr = 0x%jx\n", (uintmax_t)paddr);
|
||||
val = atoi(buf);
|
||||
printf("val: %d\n", val);
|
||||
sprintf(buf, "%d", -val);
|
||||
}
|
||||
|
||||
int ret = munmap(vaddr, PAGE_SIZE*2);
|
||||
if (ret) {
|
||||
printf("munmap failed:%d \n",ret);
|
||||
}
|
||||
close(map_fd);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
#define _XOPEN_SOURCE 700
|
||||
#include <assert.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h> /* uintmax_t */
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h> /* sysconf */
|
||||
|
||||
#include "common.h" /* virt_to_phys_user */
|
||||
|
||||
#define MAP_POPULATE 0x8000
|
||||
enum { BUFFER_SIZE = 4 };
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fd;
|
||||
long page_size;
|
||||
char *address1, *address2;
|
||||
char buf[BUFFER_SIZE];
|
||||
uintptr_t paddr;
|
||||
|
||||
// if (argc < 2) {
|
||||
// printf("Usage: %s <mmap_file>\n", argv[0]);
|
||||
// return EXIT_FAILURE;
|
||||
// }
|
||||
//
|
||||
|
||||
argv[1] = "/proc/custom_mmap";
|
||||
page_size = sysconf(_SC_PAGE_SIZE) * 128;
|
||||
printf("open pathname = %s of size %d\n", argv[1], page_size);
|
||||
|
||||
fd = open(argv[1], O_RDWR | O_SYNC);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
assert(0);
|
||||
}
|
||||
printf("fd = %d\n", fd);
|
||||
|
||||
/* mmap twice for double fun. */
|
||||
puts("mmap 1");
|
||||
//address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
//address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_POPULATE, fd, 0);
|
||||
address1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
if (address1 == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
printf("address1 %lx\n", address1);
|
||||
|
||||
/* Read and modify memory. */
|
||||
puts("access 1");
|
||||
//assert(!strcmp(address1, "asdf"));
|
||||
|
||||
memset(address1, 7, page_size);
|
||||
unsigned long offset = 4500;
|
||||
/* vm_fault */
|
||||
|
||||
printf("address1 + offset %lx\n", address1 + offset);
|
||||
strcpy(address1 + offset, "qwer");
|
||||
|
||||
/* Check that the physical addresses are the same.
|
||||
* They are, but TODO why virt_to_phys on kernel gives a different value? */
|
||||
|
||||
/* Check that modifications made from userland are also visible from the kernel. */
|
||||
// int ret = read(fd, buf, BUFFER_SIZE);
|
||||
// printf("ret %d\n", ret);
|
||||
// assert(!memcmp(buf, "qwer", BUFFER_SIZE));
|
||||
|
||||
/* Modify the data from the kernel, and check that the change is visible from userland. */
|
||||
// write(fd, "zxcv", 4);
|
||||
// assert(!strcmp(address1+offset, "zxcv"));
|
||||
|
||||
/* Cleanup. */
|
||||
puts("munmap 1");
|
||||
if (munmap(address1, page_size)) {
|
||||
perror("munmap");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
puts("close");
|
||||
close(fd);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
#include "ap_int.h"
|
||||
#include "hls_stream.h"
|
||||
#include "shift_flex.h"
|
||||
#include "dma.h"
|
||||
#include "para.h"
|
||||
|
||||
|
||||
template<int MAX_D, int MAX_C, int PA, int PE>
|
||||
void shiftLayer_FIXED(hls::stream<ap_uint<FM_W*PA> > &fmap,
|
||||
hls::stream<ap_uint<FM_W*PA> > &out,
|
||||
const ap_uint<W_W*PA*PE> *k0,
|
||||
const ap_uint<W_W*PA*PE> *k1,
|
||||
const T_SUM *th0,
|
||||
const T_SUM *th1,
|
||||
const int FM_D,
|
||||
const int IN_CH,
|
||||
bool skip_maxPool,
|
||||
int batch){
|
||||
#pragma HLS INLINE
|
||||
const int MID_CH = skip_maxPool?IN_CH:IN_CH << 1;
|
||||
const int MID_D = skip_maxPool? FM_D : FM_D>>1;
|
||||
hls::stream<ap_uint<SUM_W*PE> > s_conv0;
|
||||
#pragma HLS STREAM variable=s_conv0 depth=16 dim=1
|
||||
hls::stream<ap_uint<FM_W*PE> > s_relu0;
|
||||
#pragma HLS STREAM variable=s_relu0 depth=16 dim=1
|
||||
_conv2d_flex<MAX_C, PA, PE, FM_W, W_W, SUM_W, T_FMAP, T_W, T_SUM>(fmap, s_conv0, k0, FM_D, IN_CH, MID_CH, batch);
|
||||
_relu_flex<SUM_W, FM_W, PE, T_SUM>(s_conv0, s_relu0, th0, FM_D, MID_CH, batch);
|
||||
|
||||
hls::stream<ap_uint<FM_W*PE> > s_pool, s_shift;
|
||||
#pragma HLS STREAM variable=s_pool depth=16 dim=1
|
||||
_max_pool_2x2<MAX_D, MAX_C, FM_W, PE>(s_relu0, s_pool, FM_D, MID_CH, skip_maxPool, batch);
|
||||
|
||||
_shift_flex<MAX_D, MAX_C, FM_W, PE>(s_pool, s_shift, MID_D, MID_CH, batch);
|
||||
|
||||
hls::stream<ap_uint<SUM_W*PA> > s_conv1;
|
||||
#pragma HLS STREAM variable=s_conv1 depth=16 dim=1
|
||||
_conv2d_flex<MAX_C, PE, PA, FM_W, W_W, SUM_W, T_FMAP, T_W, T_SUM>(s_shift, s_conv1, k1, MID_D, MID_CH, MID_CH, batch);
|
||||
_relu_flex<SUM_W, FM_W, PA, T_SUM>(s_conv1, out, th1, MID_D, MID_CH, batch);
|
||||
}
|
||||
|
||||
template<int MAX_D, int MAX_C, int PE>
|
||||
void shiftLayer_RES(hls::stream<ap_uint<FM_W*PE> > &fmap,
|
||||
hls::stream<ap_uint<FM_W*PE> > &out,
|
||||
const ap_uint<W_W*PE*PE> *k2,
|
||||
const T_SUM *th0,
|
||||
const int FM_D,
|
||||
const int IN_CH,
|
||||
bool skip,
|
||||
int batch){
|
||||
#pragma HLS INLINE
|
||||
const int MID_CH = skip?IN_CH:IN_CH << 1;
|
||||
const int MID_D = skip? FM_D : FM_D>>1;
|
||||
|
||||
hls::stream<ap_uint<FM_W*PE> > s_pool;
|
||||
#pragma HLS STREAM variable=s_pool depth=16 dim=1
|
||||
_max_pool_2x2<MAX_D, MAX_C, FM_W, PE>(fmap, s_pool, FM_D, IN_CH, skip, batch);
|
||||
|
||||
hls::stream<ap_uint<SUM_W*PE> > s_conv0;
|
||||
#pragma HLS STREAM variable=s_conv0 depth=16 dim=1
|
||||
_conv2d_flex<MAX_C, PE, PE, FM_W, W_W, SUM_W, T_FMAP, T_W, T_SUM>(s_pool, s_conv0, k2, MID_D, IN_CH, MID_CH, skip, batch);
|
||||
_relu_flex<SUM_W, FM_W, PE, T_SUM>(s_conv0, out, th0, MID_D, MID_CH, skip, batch);
|
||||
}
|
||||
|
||||
|
||||
void wrapper(ap_uint<FM_W*PA_0>* fmap, ap_uint<FM_W*PA_0> * out,
|
||||
ap_uint<W_W * PE_0 * PA_0> *k0,
|
||||
ap_uint<W_W * PE_0 * PA_0> *k1,
|
||||
ap_uint<W_W * PA_0 * PA_0> *k2,
|
||||
int FM_D,
|
||||
int FM_CH,
|
||||
int th_i,
|
||||
bool pool,
|
||||
int batch){
|
||||
//#pragma HLS INLINE
|
||||
#pragma HLS INLINE off
|
||||
|
||||
const int MAX_LAYERS = 16;
|
||||
const int MAX_D = 224;
|
||||
const int MAX_CH = 1024;
|
||||
const int MID_CH = pool? FM_CH<<1:FM_CH;
|
||||
const int MID_D = pool? FM_D>>1 : FM_D;
|
||||
|
||||
#pragma HLS DATAFLOW
|
||||
|
||||
const T_SUM th0[MAX_LAYERS][(1<<FM_W)-1]={
|
||||
#include "th.txt"
|
||||
};
|
||||
const T_SUM th1[MAX_LAYERS][(1<<FM_W)-1]={
|
||||
#include "th.txt"
|
||||
};
|
||||
const T_SUM th2[MAX_LAYERS][(1<<FM_W)-1]={
|
||||
#include "th.txt"
|
||||
};
|
||||
hls::stream<ap_uint<FM_W*PA_0> > st_layer0;
|
||||
#pragma HLS STREAM variable=st_layer0 depth=16 dim=1
|
||||
hls::stream<ap_uint<FM_W*PA_0> > out_layer;
|
||||
#pragma HLS STREAM variable=out_layer depth=16 dim=1
|
||||
M2S<ap_uint<FM_W*PA_0>, ap_uint<FM_W*PA_0> >(fmap, st_layer0, FM_D*FM_D*FM_CH/PA_0*batch);
|
||||
hls::stream<ap_uint<FM_W*PA_0> > out_left, out_right;
|
||||
#pragma HLS STREAM variable=out_left depth=16 dim=1
|
||||
#pragma HLS STREAM variable=out_right depth=16 dim=1
|
||||
hls::stream<ap_uint<FM_W*PA_0> > left, right;
|
||||
#pragma HLS STREAM variable=left depth=16 dim=1
|
||||
#pragma HLS STREAM variable=right depth=4*16*256/32 dim=1
|
||||
splitStream(st_layer0, left, right, FM_CH/PA_0, FM_D*FM_D*batch);
|
||||
|
||||
// shift-layer
|
||||
shiftLayer_FIXED<MAX_D, MAX_CH, PA_0, PE_0>(left,out_left,
|
||||
k0, k1, th0[th_i], th1[th_i],
|
||||
FM_D, FM_CH>>1, !pool,batch);
|
||||
shiftLayer_RES<MAX_D, MAX_CH, PA_0>(right,out_right,
|
||||
k2, th2[th_i],
|
||||
FM_D, FM_CH>>1, !pool,batch);
|
||||
|
||||
mergeStream(out_left, out_right, out_layer, MID_CH/PA_0, MID_D*MID_D*batch);
|
||||
//template<typename T_OUT, typename T_IN>
|
||||
//void S2M(hls::stream<T_IN> &s_mem, T_OUT *mem, int REP){
|
||||
//
|
||||
S2M<ap_uint<FM_W*PA_0>, ap_uint<FM_W*PA_0>>(out_layer, out, MID_D*MID_D*MID_CH/PA_0*batch);
|
||||
}
|
||||
|
||||
|
||||
extern "C"
|
||||
void top(ap_uint<FM_W*PA_0>* fmap, ap_uint<FM_W*PA_0> * out,
|
||||
ap_uint<W_W * PE_0 * PA_0> *k0,
|
||||
ap_uint<W_W * PE_0 * PA_0> *k1,
|
||||
ap_uint<W_W * PA_0 * PA_0> *k2,
|
||||
int FM_D,
|
||||
int FM_CH,
|
||||
int th_i,
|
||||
bool pool,
|
||||
int batch
|
||||
){
|
||||
#pragma HLS INTERFACE m_axi port=fmap offset=slave bundle=gmem depth=4*32*32*128/32
|
||||
#pragma HLS INTERFACE s_axilite port=fmap bundle=control
|
||||
#pragma HLS INTERFACE m_axi port=out offset=slave bundle=gmem3 depth=4*32*32*128/32
|
||||
#pragma HLS INTERFACE s_axilite port=out bundle=control
|
||||
|
||||
#pragma HLS INTERFACE m_axi port=k0 bundle=gmem0 depth=128*128/32/32
|
||||
#pragma HLS INTERFACE s_axilite port=k0 bundle=control
|
||||
#pragma HLS INTERFACE m_axi port=k1 bundle=gmem1 depth=128*128/32/32
|
||||
#pragma HLS INTERFACE s_axilite port=k1 bundle=control
|
||||
#pragma HLS INTERFACE m_axi port=k2 bundle=gmem2 depth=128*128/32/32
|
||||
#pragma HLS INTERFACE s_axilite port=k2 bundle=control
|
||||
|
||||
#pragma HLS INTERFACE s_axilite port=FM_D bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=FM_CH bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=th_i bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=pool bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=batch bundle=control
|
||||
#pragma HLS INTERFACE s_axilite port=return bundle=control
|
||||
|
||||
// const int MAX_LAYERS = 16;
|
||||
// const int MAX_D = 224;
|
||||
// const int MAX_CH = 1024;
|
||||
// const int MID_CH = pool? FM_CH<<1:FM_CH;
|
||||
// const int MID_D = pool? FM_D>>1 : FM_D;
|
||||
//
|
||||
// ap_uint<W_W * PE_0 * PA_0> k0_buffer[512 * 512 / PE_0 * PA_0];
|
||||
// ap_uint<W_W * PE_0 * PA_0> k1_buffer[512 * 512 / PE_0 * PA_0];
|
||||
// ap_uint<W_W * PE_0 * PA_0> k2_buffer[512 * 512 / PE_0 * PA_0];
|
||||
//
|
||||
// int C = FM_CH>>1;
|
||||
// static const int MOCPE = MAX_CH/PE_0;
|
||||
// static const int MCPA = MAX_CH/PA_0;
|
||||
// const int CPA = C/PA_0;
|
||||
// const int OCPE = (MID_CH >> 1)/PE_0;
|
||||
//
|
||||
// for(int c=0;c<MCPA && c<CPA;c++){
|
||||
// for(int n=0;n<MOCPE && n<OCPE;n++){
|
||||
// ap_uint<W_W * PE_0 * PA_0> k0_buffer = k0[c * OCPE + n];
|
||||
// }
|
||||
// }
|
||||
// for(int c=0;c<MCPA && c<CPA;c++){
|
||||
// for(int n=0;n<MOCPE && n<OCPE;n++){
|
||||
// ap_uint<W_W * PE_0 * PA_0> k1_buffer = k1[c * OCPE + n];
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// const int MOCPE2 = MAX_CH/PA_0;
|
||||
// const int OCPE2=(MID_CH >> 1)/PA_0;
|
||||
// for(int c=0;c<MCPA && c<CPA;c++){
|
||||
// for(int n=0;n<MOCPE2 && n<OCPE2;n++){
|
||||
// ap_uint<W_W * PE_0 * PA_0> k2_buffer = k2[c * OCPE + n];
|
||||
// }
|
||||
// }
|
||||
// wrapper(fmap,out,k0_buffer,k1_buffer,k2_buffer,FM_D, FM_CH, th_i, pool, batch);
|
||||
wrapper(fmap,out,k0,k1,k2,FM_D, FM_CH, th_i, pool, batch);
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
#include "/scratch/qijing.huang/firesim_new/hls/sw/bm//mmio.h"
|
||||
#include "/home/centos/hls-fs/hls/sw/bm//mmio.h"
|
||||
#define ACCEL_BASE 0x20000
|
||||
#define AP_DONE_MASK 0b10
|
||||
#define ACCEL_INT 0x4
|
||||
void vgg_wrapper() {
|
||||
void top_wrapper() {
|
||||
// Disable Interrupt
|
||||
reg_write32(ACCEL_BASE + ACCEL_INT, 0x0);
|
||||
|
|
@ -0,0 +1,208 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use Cwd;
|
||||
use File::Copy;
|
||||
use List::Util qw(first);
|
||||
use Tie::IxHash;
|
||||
|
||||
# Inputs: file_name, func_name, func_base_addr, prefix(Optional)
|
||||
my $dir = getcwd;
|
||||
my $file_name = $ARGV[0];
|
||||
my $func_name = $ARGV[1];
|
||||
my $func_base_addr = $ARGV[2];
|
||||
|
||||
my $prefix = undef;
|
||||
|
||||
my $num_args = $#ARGV + 1;
|
||||
if ($num_args > 3) {
|
||||
$prefix = $ARGV[3];
|
||||
}
|
||||
|
||||
my $rdir = $ENV{'RDIR'};
|
||||
#print $rdir;
|
||||
if ((not defined($rdir)) or $rdir eq '') {
|
||||
print("Please source sourceme-f1.sh!\n");
|
||||
exit();
|
||||
}
|
||||
|
||||
my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
|
||||
my $wrapper_func_name = $func_name."_wrapper";
|
||||
my $wrapper_header= "bm_wrapper.h";
|
||||
|
||||
if ($prefix) {
|
||||
$func_name = $prefix.$func_name;
|
||||
}
|
||||
|
||||
my $bm_inc_path = $rdir."/hls/sw/bm/";
|
||||
#############################PARSE Verilog##############################
|
||||
|
||||
my %var_dict;
|
||||
tie %var_dict, "Tie::IxHash";
|
||||
my $verilog_file = "$dir/../verilog/$func_name"."_control_s_axi.v";
|
||||
print "Parsing ".$verilog_file."\n";
|
||||
# parse the verilog file to get the info we need
|
||||
if(!open VERILOG, "$verilog_file"){
|
||||
print $!;
|
||||
} else {
|
||||
my $start = 0;
|
||||
my $line = undef;
|
||||
while(<VERILOG>){
|
||||
$line = $_;
|
||||
|
||||
if($line =~ m/------------------------Parameter----------------------/){
|
||||
$start = 0;
|
||||
}
|
||||
if($start){
|
||||
|
||||
if($line =~ m/(0x\S+) : Data signal of (\S+)/){
|
||||
my $base_addr = $1;
|
||||
my $var = $2;
|
||||
#print("$base_addr : $var\n");
|
||||
if (exists $var_dict{$var}) {
|
||||
push (@{$var_dict{$var}}, $base_addr);
|
||||
} else {
|
||||
my @addr = ();
|
||||
push (@addr, $base_addr);
|
||||
$var_dict{$var} = \@addr;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
if($line =~ m/------------------------Address Info------------------/){
|
||||
$start= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#############################GENERATE Software Bare-metal Wrappers##############################
|
||||
# We want ordered hash so we didn't add this piece of code into a func
|
||||
#sub generate_bm_wrapper {
|
||||
# my %var_dict=%{$_[0]};
|
||||
# tie %var_dict, "Tie::IxHash";
|
||||
# my $func_base_addr = $_[1];
|
||||
foreach my $var (keys %var_dict) {
|
||||
print($var.": ");
|
||||
|
||||
my @addr = @{$var_dict{$var}};
|
||||
foreach my $base_addr(@addr) {
|
||||
|
||||
print($base_addr."\t");
|
||||
}
|
||||
print("\n");
|
||||
}
|
||||
my $wrapper = '#include "'.$bm_inc_path.'/mmio.h"'."\n";
|
||||
#$wrapper .= '#include "'.$bm_inc_path.'/time.h"'."\n";
|
||||
|
||||
$wrapper .= '#define ACCEL_BASE '.$func_base_addr."\n";
|
||||
|
||||
$wrapper .= "#define AP_DONE_MASK 0b10\n";
|
||||
$wrapper .= "#define ACCEL_INT 0x4\n";
|
||||
foreach my $var (keys %var_dict) {
|
||||
|
||||
my @addr = @{$var_dict{$var}};
|
||||
my $idx = 0;
|
||||
|
||||
foreach my $base_addr(@addr) {
|
||||
$wrapper .="#define "."ACCEL_$var"."_$idx"." $base_addr\n";
|
||||
$idx +=1;
|
||||
}
|
||||
}
|
||||
|
||||
my $ap_return = 0;
|
||||
my $ap_return_type = "uint32_t";
|
||||
if (exists $var_dict{"ap_return"}) {
|
||||
my $size=@{$var_dict{"ap_return"}};
|
||||
if ($size == 2){
|
||||
$ap_return_type = "uint64_t";
|
||||
}
|
||||
$ap_return = 1;
|
||||
}
|
||||
|
||||
if ($ap_return){
|
||||
$wrapper .= $ap_return_type." $wrapper_func_name(";
|
||||
} else {
|
||||
$wrapper .="void $wrapper_func_name(";
|
||||
}
|
||||
|
||||
my @arglist=();
|
||||
foreach my $var (keys %var_dict) {
|
||||
if ($var eq "ap_return") {
|
||||
next;
|
||||
}
|
||||
|
||||
my $var_type = "uint32_t";
|
||||
my $size=@{$var_dict{$var}};
|
||||
if ($size == 2){
|
||||
$var_type = "uint64_t";
|
||||
}
|
||||
push(@arglist, "$var_type $var");
|
||||
}
|
||||
|
||||
my $args = join ', ', @arglist;
|
||||
$wrapper.= $args.") {";
|
||||
|
||||
$wrapper.= '
|
||||
// Disable Interrupt
|
||||
reg_write32(ACCEL_BASE + ACCEL_INT, 0x0);
|
||||
';
|
||||
|
||||
foreach my $var (keys %var_dict) {
|
||||
if ($var eq "ap_return") {
|
||||
next;
|
||||
}
|
||||
|
||||
my @addr = @{$var_dict{$var}};
|
||||
my $idx = 0;
|
||||
foreach my $base_addr(@addr) {
|
||||
my $shift = "";
|
||||
if ($idx == 1){
|
||||
$shift = " >> 32";
|
||||
}elsif($idx > 1){
|
||||
die "Index exceeds limit!\n";
|
||||
}
|
||||
$wrapper .=" reg_write32(ACCEL_BASE + ACCEL_$var"."_$idx, (uint32_t) ($var$shift));\n";
|
||||
$idx +=1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
$wrapper .='
|
||||
// Write to ap_start to start the execution
|
||||
reg_write32(ACCEL_BASE, 0x1);
|
||||
|
||||
// Done?
|
||||
int done = 0;
|
||||
while (!done){
|
||||
done = reg_read32(ACCEL_BASE) & AP_DONE_MASK;
|
||||
}
|
||||
';
|
||||
|
||||
# If there a return value
|
||||
if ($ap_return){
|
||||
my @addr = @{$var_dict{"ap_return"}};
|
||||
|
||||
$wrapper .= "
|
||||
$ap_return_type ret_val = 0;\n";
|
||||
my $idx = 0;
|
||||
foreach my $base_addr(@addr) {
|
||||
my $shift = "";
|
||||
if ($idx == 1){
|
||||
$shift = " >> 32";
|
||||
}elsif($idx > 1){
|
||||
die "Index exceeds limit!\n";
|
||||
}
|
||||
$wrapper .=" ret_val = (reg_read32(ACCEL_BASE + ACCEL_ap_return"."_$idx)$shift) | ret_val;\n";
|
||||
$idx +=1;
|
||||
}
|
||||
$wrapper .= " return ret_val;\n";
|
||||
}
|
||||
|
||||
$wrapper .="}\n";
|
||||
open FILE, "> $wrapper_header";
|
||||
print FILE $wrapper;
|
||||
#}
|
||||
|
||||
#generate_bm_wrapper(\%var_dict, $func_base_addr);
|
|
@ -0,0 +1,98 @@
|
|||
// Based on code by Schuyler Eldridge. Copyright (c) Boston University
|
||||
// https://github.com/seldridge/rocket-rocc-examples/blob/master/src/main/c/rocc.h
|
||||
|
||||
#ifndef SRC_MAIN_C_ROCC_H
|
||||
#define SRC_MAIN_C_ROCC_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define STR1(x) #x
|
||||
#define STR(x) STR1(x)
|
||||
#define EXTRACT(a, size, offset) (((~(~0 << size) << offset) & a) >> offset)
|
||||
|
||||
#define CUSTOMX_OPCODE(x) CUSTOM_ ## x
|
||||
#define CUSTOM_0 0b0001011
|
||||
#define CUSTOM_1 0b0101011
|
||||
#define CUSTOM_2 0b1011011
|
||||
#define CUSTOM_3 0b1111011
|
||||
|
||||
#define CUSTOMX(X, xd, xs1, xs2, rd, rs1, rs2, funct) \
|
||||
CUSTOMX_OPCODE(X) | \
|
||||
(rd << (7)) | \
|
||||
(xs2 << (7+5)) | \
|
||||
(xs1 << (7+5+1)) | \
|
||||
(xd << (7+5+2)) | \
|
||||
(rs1 << (7+5+3)) | \
|
||||
(rs2 << (7+5+3+5)) | \
|
||||
(EXTRACT(funct, 7, 0) << (7+5+3+5+5))
|
||||
|
||||
// Standard macro that passes rd, rs1, and rs2 via registers
|
||||
#define ROCC_INSTRUCTION_DSS(X, rd, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, 10, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_DS(X, rd, rs1, funct) \
|
||||
ROCC_INSTRUCTION_R_R_I(X, rd, rs1, 0, funct, 10, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION_D(X, rd, funct) \
|
||||
ROCC_INSTRUCTION_R_I_I(X, rd, 0, 0, funct, 10)
|
||||
|
||||
#define ROCC_INSTRUCTION_SS(X, rs1, rs2, funct) \
|
||||
ROCC_INSTRUCTION_I_R_R(X, 0, rs1, rs2, funct, 11, 12)
|
||||
|
||||
#define ROCC_INSTRUCTION_S(X, rs1, funct) \
|
||||
ROCC_INSTRUCTION_I_R_I(X, 0, rs1, 0, funct, 11)
|
||||
|
||||
#define ROCC_INSTRUCTION(X, funct) \
|
||||
ROCC_INSTRUCTION_I_I_I(X, 0, 0, 0, funct)
|
||||
|
||||
// rd, rs1, and rs2 are data
|
||||
// rd_n, rs_1, and rs2_n are the register numbers to use
|
||||
#define ROCC_INSTRUCTION_R_R_R(X, rd, rs1, rs2, funct, rd_n, rs1_n, rs2_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 1, rd_n, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
: "=r" (rd_) \
|
||||
: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_R_I(X, rd, rs1, rs2, funct, rd_n, rs1_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 1, 0, rd_n, rs1_n, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_) : [_rs1] "r" (rs1_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_R_I_I(X, rd, rs1, rs2, funct, rd_n) { \
|
||||
register uint64_t rd_ asm ("x" # rd_n); \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 1, 0, 0, rd_n, rs1, rs2, funct)) "\n\t" \
|
||||
: "=r" (rd_)); \
|
||||
rd = rd_; \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_R(X, rd, rs1, rs2, funct, rs1_n, rs2_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
register uint64_t rs2_ asm ("x" # rs2_n) = (uint64_t) rs2; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 1, rd, rs1_n, rs2_n, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_), [_rs2] "r" (rs2_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_R_I(X, rd, rs1, rs2, funct, rs1_n) { \
|
||||
register uint64_t rs1_ asm ("x" # rs1_n) = (uint64_t) rs1; \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 1, 0, rd, rs1_n, rs2, funct)) "\n\t" \
|
||||
:: [_rs1] "r" (rs1_)); \
|
||||
}
|
||||
|
||||
#define ROCC_INSTRUCTION_I_I_I(X, rd, rs1, rs2, funct) { \
|
||||
asm volatile ( \
|
||||
".word " STR(CUSTOMX(X, 0, 0, 0, rd, rs1, rs2, funct)) "\n\t" ); \
|
||||
}
|
||||
|
||||
#endif // SRC_MAIN_C_ACCUMULATOR_H
|
|
@ -0,0 +1,561 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use Cwd;
|
||||
use File::Copy;
|
||||
use List::Util qw(first);
|
||||
|
||||
# Inputs: file_name, func_name, func_base_addr, prefix(Optional)
|
||||
my $dir = getcwd;
|
||||
my $file_name = $ARGV[0];
|
||||
my $func_name = $ARGV[1];
|
||||
my $func_base_addr = $ARGV[2];
|
||||
my $rdir = $ENV{'RDIR'};
|
||||
|
||||
my $prefix = undef;
|
||||
my $i = undef;
|
||||
|
||||
my $num_args = $#ARGV + 1;
|
||||
if ($num_args > 3) {
|
||||
$prefix = $ARGV[3];
|
||||
}
|
||||
|
||||
my $bm_path = $rdir."/sim/target-rtl/firechip/hls_$file_name"."_$func_name";
|
||||
if ($prefix) {
|
||||
$func_name = $prefix.$func_name;
|
||||
}
|
||||
|
||||
#print $rdir;
|
||||
if ((not defined($rdir)) or $rdir eq '') {
|
||||
print("Please source sourceme-f1.sh!\n");
|
||||
exit();
|
||||
}
|
||||
|
||||
my $build_sbt = '
|
||||
organization := "edu.berkeley.cs"
|
||||
|
||||
version := "1.0"
|
||||
|
||||
name := "hls_test_c"';
|
||||
|
||||
$build_sbt=~ s/test_c/$func_name/g;
|
||||
my $build_sbt_path= "$bm_path/"."build.sbt";
|
||||
open BUILD, ">$build_sbt_path";
|
||||
print BUILD $build_sbt;
|
||||
close BUILD;
|
||||
|
||||
my $verilog_file = "$dir/../verilog/$func_name".".v";
|
||||
my $line = undef;
|
||||
my @verilog_param = ();
|
||||
my @param_val = ();
|
||||
my @verilog_input = ();
|
||||
my @verilog_input_size = ();
|
||||
my @verilog_output = ();
|
||||
my @verilog_output_size = ();
|
||||
|
||||
#my $m_axi_data_width = undef;
|
||||
#my $s_axi_data_width = undef;
|
||||
|
||||
my @bus_names=();
|
||||
my @m_axi_data_widths = ();
|
||||
my $s_axi_data_width = undef;
|
||||
|
||||
print "Parsing ".$verilog_file."\n";
|
||||
# parse the verilog file to get the info we need
|
||||
if(!open VERILOG, "$verilog_file"){
|
||||
print $!;
|
||||
} else {
|
||||
while(<VERILOG>){
|
||||
$line = $_;
|
||||
|
||||
# Match AXI4 parameter
|
||||
if($line =~ m/parameter\s+(C_\S+) =\s+(.*);/){
|
||||
my $param = $1;
|
||||
my $val = $2;
|
||||
$param .="";
|
||||
if($param =~ m/C_M_AXI_(\S+)_DATA_WIDTH/){
|
||||
my $bus_name = lc $1;
|
||||
#$m_axi_data_width = $val;
|
||||
push(@bus_names, $bus_name);
|
||||
push(@m_axi_data_widths, $val);
|
||||
}
|
||||
if ($param eq "C_S_AXI_DATA_WIDTH") {
|
||||
$s_axi_data_width = $val;
|
||||
}
|
||||
push (@verilog_param, $param);
|
||||
push (@param_val, $val);
|
||||
} elsif($line =~ m/^\s*input\s+(.*)/){
|
||||
my $input = $1;
|
||||
#print "input:$input\n";
|
||||
if($input =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
|
||||
my $end = $1;
|
||||
my $start = $2;
|
||||
my $input_name = $3;
|
||||
#print "here!"."$input_name\n";
|
||||
push (@verilog_input, $input_name);
|
||||
my $size = 0;
|
||||
if ($end =~ m/^\d+$/){
|
||||
$size = $end - $start + 1;
|
||||
$size = "".$size;
|
||||
} elsif($end =~m/(\S+) - 1/) {
|
||||
$size = $1;
|
||||
}
|
||||
push(@verilog_input_size, $size);
|
||||
}elsif ($input =~ m/\s*(.*)\s*;/){
|
||||
my $input_name = $1;
|
||||
#print "here!"."$input_name\n";
|
||||
push (@verilog_input, $input_name);
|
||||
push(@verilog_input_size, "1");
|
||||
}
|
||||
|
||||
}elsif($line =~ m/^\s*output\s+(.*)/){
|
||||
my $output = $1;
|
||||
#print "output:$output\n";
|
||||
if($output =~ m/\s*\[(.*):(.*)\]\s*(.*)\s*;/){
|
||||
my $end = $1;
|
||||
my $start = $2;
|
||||
my $output_name = $3;
|
||||
#print "here!"."$output_name\n";
|
||||
push(@verilog_output, $output_name);
|
||||
my $size = 0;
|
||||
if ($end =~ m/^\d+$/){
|
||||
$size = $end - $start + 1;
|
||||
$size = "".$size;
|
||||
} elsif($end =~m/(\S+) - 1/) {
|
||||
$size = $1;
|
||||
}
|
||||
push(@verilog_output_size, $size);
|
||||
}elsif ($output =~ m/\s*(.*)\s*;/){
|
||||
my $output_name = $1;
|
||||
#print "here!"."$output_name\n";
|
||||
push (@verilog_output, $output_name);
|
||||
push(@verilog_output_size, "1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
print("Parameters: ");
|
||||
my $param_str = join ' ', @verilog_param;
|
||||
print $param_str."\n";
|
||||
|
||||
print("Inputs: ");
|
||||
my $in_str = join ' ', @verilog_input;
|
||||
print $in_str."\n";
|
||||
print("Outputs: ");
|
||||
my $out_str = join ' ', @verilog_output;
|
||||
print $out_str."\n";
|
||||
}
|
||||
|
||||
#creat scala folder
|
||||
my $scala_dir = "$dir/../scala";
|
||||
mkdir $scala_dir unless (-d $scala_dir);
|
||||
|
||||
##############################################################################################################################
|
||||
if(@m_axi_data_widths < 1){
|
||||
push(@bus_names, "gmem_dummy");
|
||||
push(@m_axi_data_widths, 32);
|
||||
}
|
||||
|
||||
if(not defined($s_axi_data_width)) {
|
||||
$s_axi_data_width=32
|
||||
}
|
||||
|
||||
print "Generating BlackBox file ...\n";
|
||||
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
|
||||
print "m_axi_data_width_ $bus_names[$i]= $m_axi_data_widths[$i]\n";
|
||||
}
|
||||
|
||||
print "s_axi_data_width = $s_axi_data_width\n";
|
||||
# should be under scala folder
|
||||
open BB, ">$scala_dir/$func_name"."_blackbox.scala";
|
||||
|
||||
my $blackbox1 = "
|
||||
package hls_test_c
|
||||
import Chisel._
|
||||
import freechips.rocketchip.config.{Parameters, Field}
|
||||
import freechips.rocketchip.tile._
|
||||
import freechips.rocketchip.util._
|
||||
|
||||
class test_c() extends BlackBox() {
|
||||
";
|
||||
$blackbox1 =~ s/test_c/$func_name/g;
|
||||
|
||||
# Print parameters
|
||||
my $i = undef;
|
||||
for( $i = 0; $i < @verilog_param; $i = $i + 1 ){
|
||||
$blackbox1 .= "val $verilog_param[$i] = $param_val[$i]\n";
|
||||
}
|
||||
|
||||
print BB $blackbox1;
|
||||
|
||||
|
||||
print BB "\tval io = new Bundle {\n";
|
||||
my $bb_body = "";
|
||||
|
||||
# now if the input name does not start with ap, we assume it is an arg
|
||||
my $ap_return = 0;
|
||||
my $ap_clk = 0;
|
||||
my $ap_rst = 0;
|
||||
my $ap_rst_n = 0;
|
||||
|
||||
my @verilog_axi_io = ();
|
||||
|
||||
for( $i = 0; $i < @verilog_input; $i = $i + 1 ){
|
||||
my $input_name = $verilog_input[$i];
|
||||
my $input_size = $verilog_input_size[$i];
|
||||
if ($input_name =~ m/^ap_clk$/){
|
||||
$ap_clk = 1;
|
||||
}
|
||||
elsif ($input_name =~ m/^ap_rst$/){
|
||||
$ap_rst = 1;
|
||||
}
|
||||
elsif ($input_name =~ m/^ap_rst_n$/){
|
||||
$ap_rst_n = 1;
|
||||
}
|
||||
elsif($input_name =~ m/^(m_axi|s_axi)\S+$/){
|
||||
push (@verilog_axi_io, $input_name);
|
||||
}
|
||||
|
||||
print BB "\t\tval $input_name = ";
|
||||
if ($input_name =~ m/ap_clk(.*)/){
|
||||
print BB "Clock\(INPUT\)\n";
|
||||
}else{
|
||||
print BB "Bits\(INPUT, width = $input_size\)\n";
|
||||
}
|
||||
}
|
||||
|
||||
for( $i = 0; $i < @verilog_output; $i = $i + 1 ){
|
||||
|
||||
my $output_name = $verilog_output[$i];
|
||||
my $output_size = $verilog_output_size[$i];
|
||||
|
||||
if ($output_name =~ m/ap_return(.*)/){
|
||||
$ap_return = 1;
|
||||
}
|
||||
elsif($output_name =~ m/^(m_axi|s_axi)\S+$/){
|
||||
push (@verilog_axi_io, $output_name);
|
||||
}
|
||||
|
||||
print BB "\t\tval $output_name = ";
|
||||
print BB "Bits(OUTPUT, width = $output_size)\n";
|
||||
|
||||
}
|
||||
|
||||
print BB "\t}\n";
|
||||
print BB "}\n";
|
||||
|
||||
close BB;
|
||||
##############################################################################################################################
|
||||
print "Generating Control file ...\n";
|
||||
|
||||
open CT, ">$scala_dir/$func_name"."_accel.scala";
|
||||
|
||||
#TODO Fix AXI4 params
|
||||
my $control1 = '
|
||||
package hls_test_c
|
||||
|
||||
import chisel3._
|
||||
import chisel3.util._
|
||||
|
||||
import freechips.rocketchip.config.{Field, Parameters}
|
||||
import freechips.rocketchip.diplomacy._
|
||||
import freechips.rocketchip.tilelink._
|
||||
import freechips.rocketchip.amba.axi4._
|
||||
import freechips.rocketchip.util._
|
||||
import freechips.rocketchip.subsystem._
|
||||
|
||||
class HLStest_cAXI (address: BigInt = 0x20000, beatBytes: Int = 8) (implicit p: Parameters) extends LazyModule {
|
||||
|
||||
val numInFlight = 8
|
||||
';
|
||||
|
||||
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
|
||||
$control1 .="
|
||||
val node_$bus_names[$i] = AXI4MasterNode(Seq(AXI4MasterPortParameters(
|
||||
masters = Seq(AXI4MasterParameters(
|
||||
name = \"axil_hub_mem_out_$i\",
|
||||
id = IdRange(0, numInFlight),
|
||||
aligned = true,
|
||||
maxFlight = Some(8)
|
||||
)),
|
||||
userBits = 0
|
||||
)
|
||||
))";
|
||||
}
|
||||
$control1 .='
|
||||
val slave_node = AXI4SlaveNode(Seq(AXI4SlavePortParameters(
|
||||
slaves = Seq(AXI4SlaveParameters(
|
||||
address = List(AddressSet(address,0x4000-1)),
|
||||
regionType = RegionType.UNCACHED,
|
||||
supportsWrite = TransferSizes(1, beatBytes),
|
||||
supportsRead = TransferSizes(1, beatBytes),
|
||||
interleavedId = Some(0)
|
||||
)),
|
||||
beatBytes = beatBytes
|
||||
)))
|
||||
|
||||
lazy val module = new HLStest_cAXIModule(this)
|
||||
}
|
||||
|
||||
class HLStest_cAXIModule(outer: HLStest_cAXI) extends LazyModuleImp(outer) {
|
||||
|
||||
//val (out, edge) = outer.node.out(0)
|
||||
val (slave_in, slave_edge) = outer.slave_node.in(0)
|
||||
|
||||
val bId = Reg(UInt(32.W))
|
||||
val rId = Reg(UInt(32.W))
|
||||
|
||||
val bb = Module(new test_c())
|
||||
';
|
||||
|
||||
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
|
||||
$control1 .="
|
||||
val (out_$bus_names[$i], edge_$bus_names[$i]) = outer.node_$bus_names[$i].out(0)";
|
||||
}
|
||||
$control1 .= "\n";
|
||||
$control1 =~ s/s_axi_data_width/$s_axi_data_width/g;
|
||||
|
||||
if ($ap_clk eq 1){
|
||||
$control1 .= "\tbb.io.ap_clk := clock\n";
|
||||
}
|
||||
if ($ap_rst eq 1){
|
||||
$control1 .= "\tbb.io.ap_rst := reset\n";
|
||||
}
|
||||
if ($ap_rst_n eq 1){
|
||||
$control1 .= "\tbb.io.ap_rst_n := !reset.toBool() \n";
|
||||
}
|
||||
|
||||
$control1 =~ s/test_c/$func_name/g;
|
||||
print CT $control1;
|
||||
#TODO modify accelerator arg!
|
||||
my $control2 = '
|
||||
';
|
||||
|
||||
# TODO Add support for multiple AXI buses
|
||||
# AXI Inputs Signals
|
||||
for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){
|
||||
my $number = $i + 1;
|
||||
if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)READY$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.ready\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)VALID$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.valid\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)DATA$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.data\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R)LAST$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.last\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)ID$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.id\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)RESP$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := out_$bus_name.$type.bits.resp\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)VALID$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.valid\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|AR)ADDR$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.addr\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)DATA$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.data\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(W)STRB$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.bits.strb\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)READY$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tbb.io.$verilog_axi_io[$i] := slave_in.$type.ready\n";
|
||||
}
|
||||
}
|
||||
|
||||
for( $i = 0; $i < @verilog_axi_io; $i = $i + 1 ){
|
||||
my $number = $i + 1;
|
||||
if ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|W|AR)VALID$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.valid := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(R|B)READY$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.ready := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ADDR$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.addr := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)ID$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.id := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LEN$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.len := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)SIZE$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.size := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)BURST$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.burst := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)LOCK$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.lock := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)CACHE$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.cache := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)PROT$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.prot := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)QOS$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.qos := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(AW|AR)REGION$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\t//out_$bus_name.$type.bits.region := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)DATA$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.data := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)STRB$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.strb := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/m_axi_(.*)_(W)LAST$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tout_$bus_name.$type.bits.last := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(AW|W|AR)READY$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tslave_in.$type.ready := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)VALID$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tslave_in.$type.valid := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R)DATA$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tslave_in.$type.bits.data := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
elsif ($verilog_axi_io[$i] =~ m/s_axi_(.*)_(R|B)RESP$/){
|
||||
my $bus_name = $1;
|
||||
my $type = lc $2;
|
||||
$control2 .= "\tslave_in.$type.bits.resp := bb.io.$verilog_axi_io[$i]\n";
|
||||
}
|
||||
}
|
||||
|
||||
if ($ap_return eq 1){
|
||||
$control2 = $control2."\tval ap_return = accel.io.ap.rtn\n";
|
||||
}
|
||||
$control2 .= "
|
||||
// For AXI4lite, these two signals are always True
|
||||
slave_in.r.bits.last := true.B
|
||||
|
||||
when(slave_in.aw.fire()){
|
||||
bId := slave_in.aw.bits.id
|
||||
}
|
||||
|
||||
when(slave_in.ar.fire()){
|
||||
rId := slave_in.ar.bits.id
|
||||
}
|
||||
slave_in.r.bits.id := rId
|
||||
slave_in.b.bits.id := bId
|
||||
}
|
||||
";
|
||||
|
||||
# TODO Fix the width here
|
||||
$control2 .='
|
||||
trait HasPeripheryHLStest_cAXI { this: BaseSubsystem =>
|
||||
private val address = BigInt(base_addr)
|
||||
private val axi_m_portName = "HLS-Accelerator-test_c-master"
|
||||
private val axilite_s_portName = "HLS-Accelerator-test_c-slave"
|
||||
|
||||
val accel_s_axi_width = s_axi_data_width
|
||||
//val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, sbus.beatBytes))
|
||||
val hls_test_c_accel = LazyModule(new HLStest_cAXI(address, accel_s_axi_width >> 3))
|
||||
';
|
||||
|
||||
|
||||
for( $i = 0; $i < @m_axi_data_widths; $i = $i + 1 ){
|
||||
$control2 .="
|
||||
sbus.fromPort(Some(axi_m_portName)) {
|
||||
(TLWidthWidget($m_axi_data_widths[$i]>> 3 )
|
||||
:= AXI4ToTL()
|
||||
:= AXI4UserYanker()
|
||||
:= AXI4Fragmenter()
|
||||
:= AXI4IdIndexer(1))
|
||||
}:=* hls_test_c_accel.node_$bus_names[$i]
|
||||
";
|
||||
}
|
||||
|
||||
$control2 .='
|
||||
hls_test_c_accel.slave_node :=* sbus.toFixedWidthPort(Some(axilite_s_portName)) {
|
||||
(AXI4Buffer()
|
||||
:= AXI4UserYanker()
|
||||
//:= AXI4IdIndexer(params.idBits)
|
||||
//:= AXI4Deinterleaver(sbus.blockBytes) // Assume there is no iterleaved requests, iterleaveId = Some(0)
|
||||
:= TLToAXI4()
|
||||
:= TLBuffer()
|
||||
//:= TLWidthWidget(accel_s_axi_width >> 3)
|
||||
// Compared to TLWidthWidget, TLFragmenter saves the id info?
|
||||
:= TLFragmenter(accel_s_axi_width >> 3, 64, true))
|
||||
}
|
||||
}
|
||||
|
||||
trait HasPeripheryHLStest_cAXIImp extends LazyModuleImp {
|
||||
val outer: HasPeripheryHLStest_cAXI
|
||||
}';
|
||||
|
||||
$control2 =~ s/test_c/$func_name/g;
|
||||
$control2 =~ s/base_addr/$func_base_addr/g;
|
||||
$control2 =~ s/s_axi_data_width/$s_axi_data_width/g;
|
||||
print CT $control2;
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
#!/usr/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use Cwd;
|
||||
use File::Copy;
|
||||
|
||||
my $file_name = $ARGV[0];
|
||||
my $func_name = $ARGV[1];
|
||||
|
||||
my $prefix = undef;
|
||||
|
||||
my $num_args = $#ARGV + 1;
|
||||
if ($num_args > 2) {
|
||||
$prefix = $ARGV[2];
|
||||
}
|
||||
|
||||
#############################GENERATE HLS##############################
|
||||
|
||||
# Generate directive file based on LLVM emitted output
|
||||
# If the variable is of pointer type that an ap_bus interface is generated
|
||||
|
||||
my $directive_tcl_insn = 'set_directive_interface -mode ap_bus "test_c_func" test_var
|
||||
';
|
||||
|
||||
my $prefix_tcl = "";
|
||||
if ($prefix) {
|
||||
$prefix_tcl = "config_rtl -prefix ".$prefix."\n";
|
||||
}
|
||||
my $hls_pgm = undef;
|
||||
if (-f $file_name.".cpp"){
|
||||
$hls_pgm = $file_name.'.cpp -cflags "-std=c++0x" ';
|
||||
} else {
|
||||
$hls_pgm = $file_name.".c";
|
||||
}
|
||||
|
||||
# should change to add all .c files
|
||||
my $hls_tcl = 'open_project -reset test_c_prj
|
||||
set_top test_c_func
|
||||
add_files hls_pgm
|
||||
open_solution -reset "solution1"
|
||||
set_part {xcvu9p-flgb2104-2-i}
|
||||
config_compile -ignore_long_run_time
|
||||
create_clock -period 10 -name default
|
||||
'.$prefix_tcl.'
|
||||
#source "./test_c_prj/solution1/directives.tcl"
|
||||
#config_interface -clock_enable
|
||||
config_interface -m_axi_addr64
|
||||
csynth_design
|
||||
#export_design -format ip_catalog
|
||||
exit';
|
||||
|
||||
my $dir = getcwd;
|
||||
open HLS, ">$dir/run_hls.tcl";
|
||||
|
||||
# replace the function name and file name
|
||||
$hls_tcl =~ s/test_c_func/$func_name/g;
|
||||
$hls_tcl =~ s/test_c/$file_name/g;
|
||||
$hls_tcl =~ s/hls_pgm/$hls_pgm/g;
|
||||
|
||||
|
||||
# run vivado hls
|
||||
print HLS $hls_tcl;
|
||||
system("vivado_hls -f run_hls.tcl");
|
||||
|
||||
my $vivado_dir = "$dir/$file_name"."_prj/solution1/syn/verilog/";
|
||||
my $verilog_dir = "$dir/../verilog/";
|
||||
|
||||
mkdir $verilog_dir unless (-d $verilog_dir);
|
||||
unlink glob "$verilog_dir/*";
|
||||
|
||||
opendir(DIR, $vivado_dir) or die "Can't opendir $vivado_dir: $! \n";
|
||||
|
||||
my @files=readdir(DIR);
|
||||
closedir(DIR);
|
||||
|
||||
foreach my $v_file (@files){
|
||||
# Open and replace one line
|
||||
|
||||
chdir($vivado_dir);
|
||||
my $vivado_dir_escape = $vivado_dir;
|
||||
$vivado_dir_escape =~ s/\//\\\//g;
|
||||
my $perl_cmd = "perl -p -i -e 's/\$readmemh\\\(\\\"\\\.\/\$readmemh(\\\"$vivado_dir_escape/g' *";
|
||||
|
||||
print $perl_cmd;
|
||||
system ($perl_cmd);
|
||||
|
||||
$perl_cmd = "perl -p -i -e \"s/'bx/1'b0/g\" *";
|
||||
system ($perl_cmd);
|
||||
print $perl_cmd;
|
||||
|
||||
chdir($dir);
|
||||
|
||||
print "$v_file\n";
|
||||
if (-f "$vivado_dir/$v_file") {
|
||||
copy("$vivado_dir/$v_file", $verilog_dir) or die "File cannot be copied! $v_file $verilog_dir\n";
|
||||
}
|
||||
}
|
||||
|
||||
#die $!;
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
open_project -reset dialacnet_tl_prj
|
||||
set_top top
|
||||
add_files dialacnet_tl.cpp -cflags "-std=c++0x"
|
||||
open_solution -reset "solution1"
|
||||
set_part {xcvu9p-flgb2104-2-i}
|
||||
config_compile -ignore_long_run_time
|
||||
create_clock -period 10 -name default
|
||||
config_rtl -prefix tl0_
|
||||
|
||||
#source "./dialacnet_tl_prj/solution1/directives.tcl"
|
||||
#config_interface -clock_enable
|
||||
config_interface -m_axi_addr64
|
||||
csynth_design
|
||||
#export_design -format ip_catalog
|
||||
exit
|
|
@ -0,0 +1,32 @@
|
|||
|
||||
****** Vivado(TM) HLS - High-Level Synthesis from C, C++ and SystemC v2017.4.op (64-bit)
|
||||
**** SW Build 2193837 on Tue Apr 10 18:06:59 MDT 2018
|
||||
**** IP Build 2189296 on Tue Apr 10 19:39:46 MDT 2018
|
||||
** Copyright 1986-2017 Xilinx, Inc. All Rights Reserved.
|
||||
|
||||
source /opt/Xilinx/Vivado/2017.4.op/scripts/vivado_hls/hls.tcl -notrace
|
||||
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado/2017.4.op/bin/unwrapped/lnx64.o/vivado_hls'
|
||||
INFO: [HLS 200-10] For user 'centos' on host 'ip-192-168-2-23.us-west-2.compute.internal' (Linux_x86_64 version 3.10.0-693.21.1.el7.x86_64) on Wed Dec 05 01:59:13 UTC 2018
|
||||
INFO: [HLS 200-10] On os "CentOS Linux release 7.4.1708 (Core) "
|
||||
INFO: [HLS 200-10] In directory '/home/centos/hls-fs/target-design/firechip/hls_dialacnet_tl_top/src/main/c'
|
||||
INFO: [HLS 200-10] Creating and opening project '/home/centos/hls-fs/target-design/firechip/hls_dialacnet_tl_top/src/main/c/dialacnet_tl_prj'.
|
||||
INFO: [HLS 200-10] Adding design file 'dialacnet_tl.cpp' to the project
|
||||
INFO: [HLS 200-10] Creating and opening solution '/home/centos/hls-fs/target-design/firechip/hls_dialacnet_tl_top/src/main/c/dialacnet_tl_prj/solution1'.
|
||||
INFO: [HLS 200-10] Cleaning up the solution database.
|
||||
INFO: [HLS 200-10] Setting target device to 'xcvu9p-flgb2104-2-i'
|
||||
WARNING: [ANALYSIS 214-1] Skip long-run-time warning caused by lots of load/store instructions.
|
||||
INFO: [SYN 201-201] Setting up clock 'default' with a period of 10ns.
|
||||
INFO: [HLS 200-10] Analyzing design file 'dialacnet_tl.cpp' ...
|
||||
WARNING: [HLS 200-40] In file included from dialacnet_tl.cpp:5:
|
||||
./para.h:4:10: fatal error: '/ecad/tools/xilinx/Vivado/2018.2/include/gmp.h' file not found
|
||||
#include "/ecad/tools/xilinx/Vivado/2018.2/include/gmp.h"
|
||||
^
|
||||
1 error generated.
|
||||
C preprocessor failed.
|
||||
while executing
|
||||
"source [lindex $::argv 1] "
|
||||
("uplevel" body line 1)
|
||||
invoked from within
|
||||
"uplevel \#0 { source [lindex $::argv 1] } "
|
||||
|
||||
INFO: [Common 17-206] Exiting vivado_hls at Wed Dec 5 01:59:14 2018...
|
|
@ -1,21 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?= dot_product
|
||||
FUNC=add
|
||||
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -1,26 +0,0 @@
|
|||
VERBOSE= 1
|
||||
TARGET ?=dot_product_tl
|
||||
|
||||
FUNC=dot
|
||||
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
ifeq ($(LLVM), 1)
|
||||
ACCEL ?=0
|
||||
include ../../../../Makefile.llvm.in
|
||||
else
|
||||
ifeq ($(GCC), 1)
|
||||
include ../../../../Makefile.gcc.in
|
||||
else
|
||||
include ../../../../Makefile.bm.in
|
||||
endif
|
||||
endif
|
|
@ -0,0 +1,19 @@
|
|||
TARGET ?=dot_product
|
||||
|
||||
VERBOSE=1
|
||||
|
||||
LDFLAGS=
|
||||
CFLAGS=
|
||||
|
||||
POSTFIX=
|
||||
ifeq ($(CUSTOM_INST), 1)
|
||||
POSTFIX=.inst
|
||||
CFLAGS+=-DCUSTOM_INST
|
||||
endif
|
||||
|
||||
ifeq ($(CUSTOM_DRIVER), 1)
|
||||
POSTFIX=.driver
|
||||
CFLAGS+=-DCUSTOM_DRIVER
|
||||
endif
|
||||
|
||||
include ../Makefile
|
|
@ -0,0 +1,13 @@
|
|||
|
||||
#ifdef CUSTOM_INST
|
||||
#include "rocc.h"
|
||||
#endif
|
||||
uint64_t dot_wrapper(uint64_t a, uint64_t b) {
|
||||
uint64_t ret_val;
|
||||
|
||||
#ifdef CUSTOM_INST
|
||||
#define XCUSTOM_ACC 0
|
||||
ROCC_INSTRUCTION_DSS(XCUSTOM_ACC, ret_val, a, b, 0);
|
||||
#endif
|
||||
return ret_val;
|
||||
}
|
|
@ -17,14 +17,14 @@ double dot(double a[NUMBER_OF_INPUT], double b[NUMBER_OF_INPUT]) {
|
|||
#pragma HLS PIPELINE
|
||||
|
||||
// To prevent burst mode
|
||||
result[1] += a[i+1] *b[i+1];
|
||||
result[0] += a[i] *b[i];
|
||||
result[1] += a[i+1] *b[i+1];
|
||||
result[2] += a[i+2] *b[i+2];
|
||||
result[3] += a[i+3] *b[i+3];
|
||||
|
||||
result[6] += a[i+6] *b[i+6];
|
||||
result[4] += a[i+4] *b[i+4];
|
||||
result[5] += a[i+5] *b[i+5];
|
||||
result[6] += a[i+6] *b[i+6];
|
||||
result[7] += a[i+7] *b[i+7];
|
||||
|
||||
|
||||
|
@ -60,11 +60,15 @@ int main () {
|
|||
}
|
||||
|
||||
double c;
|
||||
|
||||
uint64_t start = read_cycle();
|
||||
#ifdef CUSTOM_INST
|
||||
c = dot_wrapper(a, b);
|
||||
#else
|
||||
c = dot(a, b);
|
||||
#endif
|
||||
duration(start, read_cycle());
|
||||
|
||||
printf("A . B = %x\n", c);
|
||||
|
||||
return 0;
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue