update readme, add debug on cifar

This commit is contained in:
fffasttime 2023-03-04 00:40:00 +08:00
parent 87f943e14a
commit 0b983a1223
7 changed files with 153 additions and 22 deletions

1
.gitignore vendored
View File

@ -9,3 +9,4 @@ results
!*/hls/config_simd_pe.txt
localconfig.py
/*.txt
_logs

View File

@ -252,7 +252,11 @@ class QuantActivLinear(nn.Module):
tmp = torch.tensor(input.shape[1] * 1e-3, dtype=torch.float)
self.memory_size.copy_(tmp)
out = self.activ(input)
## print('ii',input[0,0,:,0]/self.activ.step)
## print('lineari', torch.round(out[0,:]/self.activ.step).int())
## wstd = self.linear.weight.std()
out = self.linear(out)
## print('linearo', torch.round(out[0,:]/(self.activ.step*self.linear.step*wstd)).int())
return out

View File

@ -98,7 +98,7 @@ def extract_model(in_shape):
assert sub_module.bias is None, 'inner conv has no bias in this model'
if isinstance(sub_module, QuantConv2d): # New quant
conv_cur.wbit = sub_module.bit
conv_cur.w, conv_cur.wstep = sub_module.export_quant() # wstep is not QuantConv2d.step becuause of alpha
conv_cur.w, conv_cur.wstep = sub_module.export_quant() # wstep is not QuantConv2d.step because of alpha
else:
raise NotImplementedError(sub_module)
print(', ich {ich}, och {och}, irow {irow}, icol {icol}, ksp {k}{s}{p}, wbit {wbit}, wstep {wstep}'.format(**vars(conv_cur)))
@ -124,7 +124,7 @@ def extract_model(in_shape):
if isinstance(sub_module, QuantLinear): # New quant
conv_cur.wbit = sub_module.bit
conv_cur.w, conv_cur.wstep = sub_module.export_quant() # wstep is not QuantLinear.step becuause of alpha
conv_cur.w, conv_cur.wstep = sub_module.export_quant() # wstep is not QuantLinear.step because of alpha
print(', ich {ich}, och {och}, wbit {wbit}, wstep {wstep}'.format(**vars(conv_cur)))
@ -388,7 +388,7 @@ if __name__=='__main__':
# load model and state_dict
ptfile:Dict = torch.load('weights/' + opt.weight + '.pt', map_location='cpu')
model = getattr(models, opt.model)(**ptfile.setdefault('model_params', {}))
model.load_state_dict(ptfile['model'])
model.load_state_dict(ptfile['model'], strict = False)
# processs
model_param = extract_model([1, 32, 32])

View File

@ -13,8 +13,8 @@ class QConvLayer:
self.conv = conv_param
self.w = torch.tensor(self.conv.w, dtype = torch.int64)
def __call__(self, x: torch.Tensor):
if self.conv.icol < x.shape[-1]: # maxpool
def __call__(self, x: torch.Tensor, downsampling):
if self.conv.icol < x.shape[-1]: # Maxpool. Note: Order of Maxpool and BN is IMPORTANT when BN.inc can be negative
assert self.conv.irow*2, self.conv.icol*2 == x.shape[2:]
x = F.max_pool2d(x.float(), kernel_size = 2, stride = 2).to(dtype=torch.int64)
@ -27,6 +27,8 @@ class QConvLayer:
x = F.conv2d(x, self.w, bias=None, stride=self.conv.s, padding=self.conv.p) # [N, OCH, OROW, OCOL]
# print('convo', self.conv.n, x[0,0,:,:])
#if downsampling: # Maxpool
# x = F.max_pool2d(x.float(), kernel_size = 2, stride = 2).to(dtype=torch.int64)
och = x.shape[1]
if True:
if self.conv.inc is not None:
@ -48,13 +50,11 @@ class QConvLayer:
if hasattr(self.conv, 'bias'):
bias_ch = self.conv.bias_raw.reshape((1, och, 1, 1))
x += bias_ch
# print('biaso', self.conv.n, x[0,0,:,:])
x = torch.round(x).to(dtype = torch.int64)
if hasattr(self.conv, 'obit'):
x.clip_(0, 2**(self.conv.obit)-1)
return x
class HWModel:
@ -69,7 +69,7 @@ class HWModel:
x=x>>(8-self.layers[0].conv.abit)
for i, layer in enumerate(self.layers):
x = layer(x)
x = layer(x, self.layers[i+1].conv.icol<layer.conv.icol if i+1<len(self.layers) else False)
x = x.float() / self.layers[-1].conv.div
return x

View File

@ -68,7 +68,7 @@ if __name__ == '__main__':
ptfile: Dict = torch.load('weights/' + opt.weight+'.pt', map_location=device)
model_params = ptfile.setdefault('model_params', {})
model = getattr(models, opt.model)(**model_params).to(device)
model.load_state_dict(ptfile['model'])
model.load_state_dict(ptfile['model'], strict = False)
# Test
res = test(model, device, batch_size=opt.batch_size, num_batch=opt.num_batch)

136
readme.md
View File

@ -1,16 +1,126 @@
## train
- python train.py --multi-scale --img-size 320 --multi-scale --batch-size 32
# AnyPackingNet
## reference
- https://github.com/ultralytics/yolov3.git
This is the training model source code of the paper:
## 22修改
- python train22.py 训练非量化模型时不用加其他参数
- train22.py 80行改使用的模型初始训练使用UltraNet_Float或RepVGG
- QAT时把模型改为量化模型例如UltraNet_ismart并且用--weights载入全精度权重
- 测试精度使用python test.py --weights <权重路径> --model <模型类名称>
Mixed-precision Neural Network Quantization and
Implementation Co-optimization for FPGAs
### 注意
- 余弦学习率衰减最好跑满全部epoch
- RepVGG全精度训练后量化训练精度更好
- quant_dorefa.py 第18行改权重量化为[-8,7]还是[-7,7]
## Classification Model
### Usage
```bash
cd cifar/
# 1. Hardware-aware Mixed Precison NAS
python search_train.py --cd 3e-5 --name mix_vggtiny_cifar_cd3e5
# Params:
# --cd Stands for complexity decay
# --name Stands for checkpoint .pt and .log filename
# --model Mixed precision supernet model, default is `VGGtiny_MixQ`
# Then, the optimal bit width of each layer will converge after dozens of epochs, for example bitw={8,2,2,2,2,2}, bita = {8,3,3,3,6,3}
# 2. Main train
python main_train.py --bitw 822222 --bita 833363 --name vggtiny_cifar_cd3e5
# Trained weights are under weights/tiny_cifar_cd3e5.pt
# 3. Test model
python test_acc.py
# You can choose tiny_cifar_cd3e5.pt for test if nothing wrong
# 4. HLS code generation:
# Now can directly export HLS configuration header and weight file form .pt weight file.
# Adjust `simd, pe` parallelization factor of each layer firstly.
vim hls/config_simd_pe.txt
# Export `config.h` and `weights.hpp` to /hls/tiny_cifar_cd3e5/
python export_hls.py
# 5. Model-Level Hardware Simulation
# simulate_hls.py requires /hls/tiny_cifar_cd3e5/model_param.pkl file generated by export_hls.py
python simulate_hls.py
# This output should consist with hardware output or HLS C-Level simluation
```
## DAC-SDC Object Detection Model
The DAC System Design Contest focused on low-power object detection on an embedded FPGA system: https://www.dac.com/Conference/System-Design-Contest.
The target of this contest is optimize performance of the designs in terms of accuracy and power on a Ultra 96 v2 FPGA board. This contest was held 5 times, from 2018 to 2022, and the performance of optimal design in these years increased from 30 fps to thousands of fps.
Base models for anypacking bitwidth search:
- UltraNet: https://github.com/heheda365/ultra_net by BJUT_runner team, 1st place of 2020 DAC-SDC contest. UltraNet is a VGGNet-like model with much less parameters. UltraNet_iSmart is 2nd place of 2021 DAC-SDC design by UIUC ismart team, which have much better throughput by fixed packing optimize.
- UltraNet_Bypass: https://github.com/heymesut/SJTU_microe 21' SJTU, 3rd place of 2021 DAC-SDC contest. A variant of UltraNet with bypass connect. Bypass connect increases model accuracy, but makes design of NN acclerator based on pipeline architecture more difficult.
- SkyNet: https://github.com/jiangwx/SkrSkr SkrSkr by SHTECH, 1st place of 2021 DAC-SDC contest. SkyNet is a MobileNet-like lightweight model.
- SkyNetk5: SkyNet with 5x5 depthwise convolution kernel. Since dwconv uses much fewer calculations than pwconv, larger kernel brings higher accuracy with slight cost.
Dataset: See https://byuccl.github.io/dac_sdc_2022/info/.
**Usage**: First `cd dacsdc/`, then follow next steps.
### 1) Hardware-aware Mixed Precison NAS for bit width
```bash
# For UltraNet with mixed precision:
python search_train.py --cd 1e-5 --name mix_ultranet_cd1e5
# UltraNet with Bypass:
python search_train.py --cd 1e-5 --name mix_ultranet_bypass_cd1e5 --model UltraNetBypass_MixQ
# SkyNet/SkyNetk5
python search_train.py --cd 1e-5 --name mix_skynet_cd1e5 --model [SkyNet_MixQ | SkyNetk5_MixQ]
```
### 2) Main Train
For UltraNet:
```bash
# UltraNet_BJTU use full 4bit wquantization
python main_train.py --bitw 444444444 --bita 844444444 --name ultranet_BJTU
# UltraNet_iSmart use full 4-8 mixed quantization for weight
python main_train.py --bitw 844444448 --bita 844444444 --name ultranet_iSmart
# Or use searched bitw, bita from search_train.py
python main_train.py --bitw <bitw> --bita <bita> --name ultranet_anypacking
```
For UltraNet_Bypass/SkyNet/SkyNetk5
```bash
python main_train.py --bitw <bitw> --bita <bita> --name <ckptname> --model [UltraNet_Bypass | SkyNet | SkyNetk5]
```
### 3) Test model
```bash
python test.py [--model [UltraNet_Bypass_FixQ | SkyNet_FixQ | SkyNetk5_FixQ]]
```
### 4) HLS export
```bash
# For Ultranet or Ultranet_Bypass
python export_hls.py [--model UltraNet_Bypass_FixQ]
# For SkyNet or SkyNetk5
python export_hls.py [--model SkyNetk5_FixQ]
```
### 5) Model-Level Hardware Simulation
```bash
python simulate_hls.py [--model [UltraNet_Bypass_FixQ | SkyNet_FixQ | SkyNetk5_FixQ]]
```
## Reference
- https://github.com/zhaoweicai/EdMIPS EdMIPS: Rethinking Differentiable Search for Mixed-Precision Neural Networks
- https://github.com/kuangliu/pytorch-cifar Smaller models for cifar dataset
- https://github.com/ultralytics/yolov3.git yolov3 training framework
- https://github.com/jiangwx/SkyNet SkyNet by SHTECH, winner of 2019 DAC-SDC contest
- https://github.com/jgoeders/dac_sdc_2020_designs Winner designs of 2020 DAC-SDC contest
- https://github.com/heheda365/ultra_net BJUT_runner team, 1st place of 2020 DAC-SDC contest, UltraNet
- https://github.com/jgoeders/dac_sdc_2021_designs Winner designs of 2021 DAC-SDC contest
- https://github.com/jiangwx/SkrSkr SkrSkr by SHTECH, 1st place of 2021 DAC-SDC contest, SkyNet
- https://github.com/xliu0709/DACSDC2021 iSmart team, 2nd place of 2021 DAC-SDC design, UltraNet with optimized packing method
- https://github.com/heymesut/SJTU_microe 3rd place of 2021 DAC-SDC design by SJTU, a variant of UltraNet with bypass
- https://github.com/jgoeders/dac_sdc_2022_designs Winner designs of 2022 DAC-SDC contest
- https://github.com/MatthewLuo7/InvolutionNet 3rd place of 2022 DAC-SDC design (ours), without anypacking design

View File

@ -93,3 +93,19 @@ def load_classifier(name='resnet101', n=2):
model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
model.last_linear.out_features = n
return model
log_layerid = 0
def loglayer(x):
global log_layerid
import numpy as np
x=x.numpy()
assert x.dtype == np.int
with open('_logs/test%d.txt'%log_layerid, 'w') as f:
for i in range(x.shape[0]):
print('C', i, file=f)
for j in range(x.shape[1]):
for k in range(x.shape[2]):
print('%3d'%x[i,j,k], end=',', file=f)
print(file=f)
log_layerid+=1