forked from liucheng/DeepBurning-MixQ
1180 lines
39 KiB
Python
1180 lines
39 KiB
Python
from typing import Dict
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
import torch.nn.init as init
|
|
from quant_dorefa import conv2d_Q_fn, activation_quantize_fn
|
|
import anypacking.quant_module as qm
|
|
|
|
def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32):
|
|
nx, ny = ng # x and y grid size
|
|
self.img_size = max(img_size)
|
|
self.stride = self.img_size / max(ng)
|
|
|
|
# build xy offsets
|
|
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
|
self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2))
|
|
|
|
# build wh gains
|
|
self.anchor_vec = self.anchors.to(device) / self.stride
|
|
self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type)
|
|
self.ng = torch.Tensor(ng).to(device)
|
|
self.nx = nx
|
|
self.ny = ny
|
|
|
|
class YOLOLayer(nn.Module):
|
|
def __init__(self, anchors):
|
|
super(YOLOLayer, self).__init__()
|
|
|
|
self.anchors = torch.Tensor(anchors)
|
|
self.na = len(anchors) # number of anchors (3)
|
|
self.no = 6 # number of outputs
|
|
self.nx = 0 # initialize number of x gridpoints
|
|
self.ny = 0 # initialize number of y gridpoints
|
|
def forward(self, p, img_size):
|
|
|
|
bs, _, ny, nx = p.shape # bs, 255, 13, 13
|
|
if (self.nx, self.ny) != (nx, ny):
|
|
create_grids(self, img_size, (nx, ny), p.device, p.dtype)
|
|
|
|
# p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh)
|
|
p = p.view(bs, self.na, self.no, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction
|
|
|
|
if self.training:
|
|
return p
|
|
|
|
else: # inference
|
|
# s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2)
|
|
io = p.clone() # inference output
|
|
io[..., :2] = torch.sigmoid(io[..., :2]) + self.grid_xy # xy
|
|
io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method
|
|
# io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh # wh power method
|
|
io[..., :4] *= self.stride # 原始像素尺度
|
|
|
|
|
|
torch.sigmoid_(io[..., 4:])
|
|
|
|
|
|
return io.view(bs, -1, self.no), p
|
|
|
|
def fixq_fetch_arch_info(self):
|
|
sum_bitops, sum_bita, sum_bitw, sum_dsps, sum_bram = 0, 0, 0, 0, 0
|
|
layer_idx = 0
|
|
for m in self.modules():
|
|
if isinstance(m, self.conv_func):
|
|
if m.wbit == 32 or m.abit == 32:
|
|
layer_idx += 1
|
|
continue
|
|
|
|
size_product = m.size_product.item()
|
|
memory_size = m.memory_size.item()
|
|
bitops = size_product * m.abit * m.wbit
|
|
bita = m.memory_size.item() * m.abit
|
|
bitw = m.param_size * m.wbit
|
|
if m.kernel_size == 1:
|
|
dsp_factors = qm.dsp_factors_k11
|
|
elif m.kernel_size == 3:
|
|
dsp_factors = qm.dsp_factors_k33
|
|
elif m.kernel_size == 5:
|
|
dsp_factors = qm.dsp_factors_k55
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
if m.kernel_size == 1:
|
|
bram_sw = 2 * m.in_width.item() * m.inplane
|
|
else:
|
|
bram_sw = (m.kernel_size+1)*m.in_width.item()*m.inplane
|
|
bram_sw *= 1e-3
|
|
|
|
dsps = size_product / dsp_factors[m.wbit-2][m.abit-2]
|
|
weight_shape = list(m.conv.weight.shape)
|
|
print('idx {} with shape {}, bitops: {:.3f}M * {} * {}, memory: {:.3f}K * {}, '
|
|
'param: {:.3f}M * {}, dsps: {:.3f}M, bram(wa|waf):({:.2f},{:.2f}|{:.1f},{:.1f},{:.1f})K'.format(layer_idx, weight_shape, size_product, m.abit,
|
|
m.wbit, memory_size, m.abit, m.param_size, m.wbit, dsps,
|
|
m.param_size * 1e3, bram_sw, bitw*1e3, bram_sw*m.abit, bitw*1e3 + bram_sw*m.abit))
|
|
sum_bitops += bitops
|
|
sum_bita += bita
|
|
sum_bitw += bitw
|
|
sum_dsps += dsps
|
|
sum_bram += bitw*1e3 + bram_sw*m.abit
|
|
layer_idx += 1
|
|
return sum_bitops, sum_bita, sum_bitw, sum_dsps, sum_bram
|
|
|
|
def mixq_fetch_best_arch(self):
|
|
sum_bitops, sum_bita, sum_bitw, sum_dsps = 0, 0, 0, 0
|
|
sum_mixbitops, sum_mixbita, sum_mixbitw, sum_mixdsps = 0, 0, 0, 0
|
|
sum_mixbram_weight, sum_mixbram_cache = 0, 0
|
|
layer_idx = 0
|
|
best_arch = None
|
|
for m in self.modules():
|
|
if isinstance(m, self.conv_func):
|
|
layer_arch, bitops, bita, bitw, mixbitops, mixbita, mixbitw, dsps, mixdsps, mixbram_weight, mixbram_cache = m.fetch_best_arch(layer_idx)
|
|
if best_arch is None:
|
|
best_arch = layer_arch
|
|
else:
|
|
for key in layer_arch.keys():
|
|
if key not in best_arch:
|
|
best_arch[key] = layer_arch[key]
|
|
else:
|
|
best_arch[key].append(layer_arch[key][0])
|
|
sum_bitops += bitops
|
|
sum_bita += bita
|
|
sum_bitw += bitw
|
|
sum_mixbitops += mixbitops
|
|
sum_mixbita += mixbita
|
|
sum_mixbitw += mixbitw
|
|
sum_dsps += dsps
|
|
sum_mixdsps += mixdsps
|
|
sum_mixbram_weight += mixbram_weight
|
|
sum_mixbram_cache += mixbram_cache
|
|
layer_idx += 1
|
|
return best_arch, sum_bitops, sum_bita, sum_bitw, sum_mixbitops, sum_mixbita, sum_mixbitw, sum_dsps, sum_mixdsps, sum_mixbram_weight, sum_mixbram_cache
|
|
|
|
def mixq_complexity_loss(self):
|
|
size_product = []
|
|
loss = 0
|
|
for m in self.modules():
|
|
if isinstance(m, self.conv_func):
|
|
loss += m.complexity_loss()
|
|
size_product += [m.size_product]
|
|
normalizer = size_product[0].item()
|
|
loss /= normalizer
|
|
return loss
|
|
|
|
def mixq_bram_loss(self):
|
|
memory_sizes = []
|
|
loss = 0
|
|
for m in self.modules():
|
|
if isinstance(m, self.conv_func):
|
|
loss += m.bram_loss()
|
|
memory_sizes += [m.memory_size.item()]
|
|
normalizer = memory_sizes[0]
|
|
loss /= normalizer
|
|
return loss
|
|
|
|
def mixq_complexity_loss_trivial(self):
|
|
size_product = []
|
|
loss = 0
|
|
for m in self.modules():
|
|
if isinstance(m, self.conv_func):
|
|
loss += m.complexity_loss_trivial()
|
|
size_product += [m.size_product]
|
|
normalizer = size_product[0].item()
|
|
loss /= normalizer
|
|
return loss
|
|
|
|
class UltraNet_ismart(nn.Module):
|
|
def __init__(self):
|
|
super(UltraNet_ismart, self).__init__()
|
|
W_BIT = 4
|
|
A_BIT = 4
|
|
conv2d_q = conv2d_Q_fn(W_BIT)
|
|
conv2d_8 = conv2d_Q_fn(8)
|
|
# act_q = activation_quantize_fn(4)
|
|
|
|
self.layers = nn.Sequential(
|
|
conv2d_8(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(16),
|
|
activation_quantize_fn(A_BIT),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(32),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
|
|
# nn.Conv2d(256, 18, kernel_size=1, stride=1, padding=0)
|
|
conv2d_8(64, 36, kernel_size=1, stride=1, padding=0)
|
|
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x = self.layers(x)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
|
|
class UltraNet(nn.Module):
|
|
def __init__(self):
|
|
super(UltraNet, self).__init__()
|
|
W_BIT = 4
|
|
A_BIT = 4
|
|
conv2d_q = conv2d_Q_fn(W_BIT)
|
|
|
|
self.layers = nn.Sequential(
|
|
conv2d_q(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(16),
|
|
activation_quantize_fn(A_BIT),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(32),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
|
|
conv2d_q(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
activation_quantize_fn(A_BIT),
|
|
|
|
|
|
# nn.Conv2d(256, 18, kernel_size=1, stride=1, padding=0)
|
|
conv2d_q(64, 36, kernel_size=1, stride=1, padding=0)
|
|
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x = self.layers(x)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
class UltraNet_MixQ(nn.Module):
|
|
def __init__(self, share_weight = False):
|
|
super(UltraNet_MixQ, self).__init__()
|
|
self.conv_func = qm.MixActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
conv_kwargs = {'kernel_size':3, 'stride':1, 'padding':1, 'bias':False}
|
|
qspace = {'wbits':[2,3,4,5,6,7,8], 'abits':[2,3,4,5,6,7,8], 'share_weight':share_weight}
|
|
|
|
self.layers = nn.Sequential(
|
|
conv_func(3, 16, ActQ = qm.ImageInputQ, **conv_kwargs, **qspace), #0
|
|
nn.BatchNorm2d(16),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(16, 32, **conv_kwargs, **qspace),#1
|
|
nn.BatchNorm2d(32),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(32, 64, **conv_kwargs, **qspace),#2
|
|
nn.BatchNorm2d(64),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),#3
|
|
nn.BatchNorm2d(64),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),#4
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),#5
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),#6
|
|
nn.BatchNorm2d(64),
|
|
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),#7
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 36, kernel_size = 1, stride =1, padding = 0, **qspace)#8
|
|
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x = self.layers(x)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
# def fetch_best_arch(self):
|
|
fetch_best_arch = mixq_fetch_best_arch
|
|
|
|
# def complexity_loss(self):
|
|
complexity_loss = mixq_complexity_loss
|
|
complexity_loss_trivial = mixq_complexity_loss_trivial
|
|
bram_loss = mixq_bram_loss
|
|
|
|
class UltraNet_FixQ(nn.Module):
|
|
def __init__(self, bitw = '444444444', bita = '444444444'):
|
|
super(UltraNet_FixQ, self).__init__()
|
|
self.conv_func = qm.QuantActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
assert(len(bitw)==0 or len(bitw)==9)
|
|
assert(len(bita)==0 or len(bita)==9)
|
|
if isinstance(bitw, str):
|
|
bitw=list(map(int, bitw))
|
|
if isinstance(bita, str):
|
|
bita=list(map(int, bita))
|
|
|
|
self.bitw = bitw
|
|
self.bita = bita
|
|
self.model_params = {'bitw': bitw, 'bita': bita}
|
|
|
|
conv_kwargs = {'kernel_size':3, 'stride':1, 'padding':1, 'bias':False}
|
|
|
|
self.layers = nn.Sequential(
|
|
conv_func(3, 16, ActQ = qm.ImageInputQ, **conv_kwargs, wbit=bitw[0], abit=bita[0]),#0
|
|
nn.BatchNorm2d(16),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(16, 32, **conv_kwargs, wbit=bitw[1], abit=bita[1]),#1
|
|
nn.BatchNorm2d(32),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(32, 64, **conv_kwargs, wbit=bitw[2], abit=bita[2]),#2
|
|
nn.BatchNorm2d(64),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[3], abit=bita[3]),#3
|
|
nn.BatchNorm2d(64),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[4], abit=bita[4]),#4
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[5], abit=bita[5]),#5
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[6], abit=bita[6]),#6
|
|
nn.BatchNorm2d(64),
|
|
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[7], abit=bita[7]),#7
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 36, kernel_size = 1, stride =1, padding = 0, wbit=bitw[8], abit=bita[8])#8
|
|
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
|
|
x = self.layers(x)
|
|
p = self.yololayer(x, img_size) # train: p; test: (io, p)
|
|
|
|
if self.training: # train
|
|
return [p]
|
|
else: # test
|
|
return p[0], (p[1],) # inference output, training output
|
|
|
|
# def fetch_arch_info(self):
|
|
fetch_arch_info = fixq_fetch_arch_info
|
|
|
|
class UltraNetFloat(nn.Module):
|
|
def __init__(self):
|
|
super(UltraNetFloat, self).__init__()
|
|
|
|
self.layers = nn.Sequential(
|
|
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(16),
|
|
nn.ReLU(inplace=True),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(32),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
|
|
# nn.Conv2d(256, 18, kernel_size=1, stride=1, padding=0)
|
|
nn.Conv2d(64, 36, kernel_size=1, stride=1, padding=0)
|
|
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x = self.layers(x)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
class ReorgLayer(nn.Module):
|
|
def __init__(self, stride=2):
|
|
super(ReorgLayer, self).__init__()
|
|
self.stride = stride
|
|
def forward(self, x):
|
|
stride = self.stride
|
|
assert(x.data.dim() == 4)
|
|
B = x.data.size(0)
|
|
C = x.data.size(1)
|
|
H = x.data.size(2)
|
|
W = x.data.size(3)
|
|
assert(H % stride == 0)
|
|
assert(W % stride == 0)
|
|
ws = stride
|
|
hs = stride
|
|
x = x.view([B, C, H//hs, hs, W//ws, ws]).transpose(3, 4).contiguous()
|
|
x = x.view([B, C, H//hs*W//ws, hs*ws]).transpose(2, 3).contiguous()
|
|
x = x.view([B, C, hs*ws, H//hs, W//ws]).transpose(1, 2).contiguous()
|
|
x = x.view([B, hs*ws*C, H//hs, W//ws])
|
|
return x
|
|
|
|
class UltraNetBypassFloat(nn.Module):
|
|
def __init__(self):
|
|
super(UltraNetBypassFloat, self).__init__()
|
|
self.reorg = ReorgLayer(stride=2)
|
|
|
|
self.layers_p1 = nn.Sequential(
|
|
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(16),
|
|
nn.ReLU(inplace=True),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(32),
|
|
nn.ReLU(inplace=True),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
nn.MaxPool2d(2, stride=2)
|
|
)
|
|
|
|
self.layers_p2 = nn.Sequential(
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
)
|
|
|
|
self.layers_p3 = nn.Sequential(
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
)
|
|
|
|
self.layers_p4 = nn.Sequential(
|
|
nn.Conv2d(320, 64, kernel_size=3, stride=1, padding=1, bias=False), # cat p2--64→64*4 + 64
|
|
nn.BatchNorm2d(64),
|
|
nn.ReLU(inplace=True),
|
|
|
|
nn.Conv2d(64, 36, kernel_size=1, stride=1, padding=0)
|
|
)
|
|
self.yololayer = YOLOLayer([[20, 20], [20, 20], [20, 20], [20, 20], [20, 20], [20, 20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x_p1 = self.layers_p1(x)
|
|
x_p2 = self.layers_p2(x_p1)
|
|
x_p2_reorg = self.reorg(x_p2)
|
|
x_p3 = self.layers_p3(x_p2)
|
|
x_p4_in = torch.cat([x_p2_reorg, x_p3], 1)
|
|
x_p4 = self.layers_p4(x_p4_in)
|
|
|
|
x = self.yololayer(x_p4, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
class UltraNetBypass_MixQ(nn.Module):
|
|
def __init__(self, share_weight = False):
|
|
super(UltraNetBypass_MixQ, self).__init__()
|
|
self.reorg = ReorgLayer(stride=2)
|
|
self.conv_func = qm.MixActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
conv_kwargs = {'kernel_size':3, 'stride':1, 'padding':1, 'bias':False}
|
|
qspace = {'wbits':[2,3,4,5,6,7,8], 'abits':[2,3,4,5,6,7,8], 'share_weight':share_weight}
|
|
|
|
self.layers_p1 = nn.Sequential(
|
|
conv_func(3, 16, ActQ = qm.ImageInputQ, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(16),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(16, 32, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(32),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(32, 64, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(64),
|
|
nn.MaxPool2d(2, stride=2)
|
|
)
|
|
|
|
self.layers_p2 = nn.Sequential(
|
|
conv_func(64, 64, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(64),
|
|
)
|
|
|
|
self.layers_p3 = nn.Sequential(
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, **qspace),
|
|
nn.BatchNorm2d(64),
|
|
)
|
|
|
|
self.layers_p4 = nn.Sequential(
|
|
conv_func(320, 64, **conv_kwargs, **qspace), # cat p2--64→64*4 + 64
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 36, kernel_size = 1, stride =1, padding = 0, **qspace)
|
|
)
|
|
self.yololayer = YOLOLayer([[20, 20], [20, 20], [20, 20], [20, 20], [20, 20], [20, 20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x_p1 = self.layers_p1(x)
|
|
x_p2 = self.layers_p2(x_p1)
|
|
x_p2_reorg = self.reorg(x_p2)
|
|
x_p3 = self.layers_p3(x_p2)
|
|
x_p4_in = torch.cat([x_p2_reorg, x_p3], 1)
|
|
x_p4 = self.layers_p4(x_p4_in)
|
|
|
|
x = self.yololayer(x_p4, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
# def fetch_best_arch(self):
|
|
fetch_best_arch = mixq_fetch_best_arch
|
|
|
|
# def complexity_loss(self):
|
|
complexity_loss = mixq_complexity_loss
|
|
mixq_complexity_loss_trivial = mixq_complexity_loss_trivial
|
|
bram_loss = mixq_bram_loss
|
|
|
|
class UltraNetBypass_FixQ(nn.Module):
|
|
def __init__(self, bitw = '444444444', bita = '444444444'):
|
|
super(UltraNetBypass_FixQ, self).__init__()
|
|
self.reorg = ReorgLayer(stride=2)
|
|
self.conv_func = qm.QuantActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
assert(len(bitw)==0 or len(bitw)==9)
|
|
assert(len(bita)==0 or len(bita)==9)
|
|
if isinstance(bitw, str):
|
|
bitw=list(map(int, bitw))
|
|
if isinstance(bita, str):
|
|
bita=list(map(int, bita))
|
|
|
|
self.bitw = bitw
|
|
self.bita = bita
|
|
self.model_params = {'bitw': bitw, 'bita': bita}
|
|
|
|
conv_kwargs = {'kernel_size':3, 'stride':1, 'padding':1, 'bias':False}
|
|
|
|
self.layers_p1 = nn.Sequential(
|
|
conv_func(3, 16, ActQ = qm.ImageInputQ, **conv_kwargs, wbit=bitw[0], abit=bita[0]),
|
|
nn.BatchNorm2d(16),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(16, 32, **conv_kwargs, wbit=bitw[1], abit=bita[1]),
|
|
nn.BatchNorm2d(32),
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(32, 64, **conv_kwargs, wbit=bitw[2], abit=bita[2]),
|
|
nn.BatchNorm2d(64),
|
|
nn.MaxPool2d(2, stride=2)
|
|
)
|
|
|
|
self.layers_p2 = nn.Sequential(
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[3], abit=bita[3]),
|
|
nn.BatchNorm2d(64),
|
|
)
|
|
|
|
self.layers_p3 = nn.Sequential(
|
|
nn.MaxPool2d(2, stride=2),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[4], abit=bita[4]),
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[5], abit=bita[5]),
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 64, **conv_kwargs, wbit=bitw[6], abit=bita[6]),
|
|
nn.BatchNorm2d(64),
|
|
)
|
|
|
|
self.layers_p4 = nn.Sequential(
|
|
conv_func(320, 64, **conv_kwargs, wbit=bitw[7], abit=bita[7]), # cat p2--64→64*4 + 64
|
|
nn.BatchNorm2d(64),
|
|
|
|
conv_func(64, 36, kernel_size = 1, stride =1, padding = 0, wbit=bitw[8], abit=bita[8])
|
|
)
|
|
self.yololayer = YOLOLayer([[20, 20], [20, 20], [20, 20], [20, 20], [20, 20], [20, 20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x_p1 = self.layers_p1(x)
|
|
x_p2 = self.layers_p2(x_p1)
|
|
x_p2_reorg = self.reorg(x_p2)
|
|
x_p3 = self.layers_p3(x_p2)
|
|
x_p4_in = torch.cat([x_p2_reorg, x_p3], 1)
|
|
x_p4 = self.layers_p4(x_p4_in)
|
|
|
|
x = self.yololayer(x_p4, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
# def fetch_arch_info(self):
|
|
fetch_arch_info = fixq_fetch_arch_info
|
|
|
|
class SkyNetFloat(nn.Module):
|
|
def __init__(self):
|
|
super(SkyNetFloat, self).__init__()
|
|
|
|
self.header = torch.IntTensor([0,0,0,0])
|
|
self.seen = 0
|
|
|
|
self.reorg = ReorgLayer(stride=2)
|
|
def conv_dw(inp, oup, stride):
|
|
return nn.Sequential(
|
|
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
|
nn.BatchNorm2d(inp),
|
|
nn.ReLU6(inplace=True),
|
|
|
|
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
|
nn.BatchNorm2d(oup),
|
|
nn.ReLU6(inplace=True),
|
|
)
|
|
self.layers = nn.Sequential(
|
|
conv_dw( 3, 48, 1), #dw1
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw( 48, 96, 1), #dw2
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw( 96, 192, 1), #dw3
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw(192, 384, 1), #dw4
|
|
conv_dw(384, 512, 1), #dw5
|
|
conv_dw(512, 96, 1),
|
|
nn.Conv2d(96, 36, 1, 1,bias=False),
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x = self.layers(x)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
class SkyNet_MixQ(nn.Module):
|
|
def __init__(self, share_weight = False):
|
|
super(SkyNet_MixQ, self).__init__()
|
|
|
|
self.conv_func = qm.MixActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
qspace = {'wbits':[2,3,4,5,6,7,8], 'abits':[2,3,4,5,6,7,8], 'share_weight':share_weight}
|
|
|
|
channels = [3,48,96,192,384,512,96]
|
|
|
|
layers = []
|
|
for i in range(6):
|
|
inp, oup = channels[i], channels[i+1]
|
|
layers.append(conv_func(
|
|
inp, inp, kernel_size=3, stride=1, padding=1, bias=False, groups = inp,
|
|
ActQ = qm.ImageInputQ if i==0 else qm.HWGQ, **qspace)) # dwconv
|
|
layers.append(nn.BatchNorm2d(channels[i]))
|
|
|
|
layers.append(conv_func(
|
|
inp, oup, kernel_size=1, stride=1, padding=0, bias=False,
|
|
**qspace)) # pwconv
|
|
|
|
layers.append(nn.BatchNorm2d(channels[i+1]))
|
|
|
|
if i<3:
|
|
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
|
|
|
|
layers.append(conv_func(
|
|
channels[-1], 36, kernel_size=1, stride=1, padding=0, bias=False,
|
|
**qspace))
|
|
|
|
self.layers = nn.Sequential(*layers)
|
|
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
|
|
x = self.layers(x)
|
|
p = self.yololayer(x, img_size) # train: p; test: (io, p)
|
|
|
|
if self.training: # train
|
|
return [p]
|
|
else: # test
|
|
return p[0], (p[1],) # inference output, training output
|
|
|
|
# def fetch_best_arch(self):
|
|
fetch_best_arch = mixq_fetch_best_arch
|
|
|
|
# def complexity_loss(self):
|
|
complexity_loss = mixq_complexity_loss
|
|
mixq_complexity_loss_trivial = mixq_complexity_loss_trivial
|
|
bram_loss = mixq_bram_loss
|
|
|
|
class SkyNetk5_MixQ(nn.Module):
|
|
def __init__(self, share_weight = False):
|
|
super(SkyNetk5_MixQ, self).__init__()
|
|
|
|
self.conv_func = qm.MixActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
qspace = {'wbits':[2,3,4,5,6,7,8], 'abits':[2,3,4,5,6,7,8], 'share_weight':share_weight}
|
|
|
|
channels = [3,48,96,192,384,512,96]
|
|
|
|
layers = []
|
|
for i in range(6):
|
|
inp, oup = channels[i], channels[i+1]
|
|
layers.append(conv_func(
|
|
inp, inp, kernel_size=5, stride=1, padding=2, bias=False, groups = inp,
|
|
ActQ = qm.ImageInputQ if i==0 else qm.HWGQ, **qspace)) # dwconv
|
|
layers.append(nn.BatchNorm2d(channels[i]))
|
|
|
|
layers.append(conv_func(
|
|
inp, oup, kernel_size=1, stride=1, padding=0, bias=False,
|
|
**qspace)) # pwconv
|
|
|
|
layers.append(nn.BatchNorm2d(channels[i+1]))
|
|
|
|
if i<3:
|
|
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
|
|
|
|
layers.append(conv_func(
|
|
channels[-1], 36, kernel_size=1, stride=1, padding=0, bias=False,
|
|
**qspace))
|
|
|
|
self.layers = nn.Sequential(*layers)
|
|
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
|
|
x = self.layers(x)
|
|
p = self.yololayer(x, img_size) # train: p; test: (io, p)
|
|
|
|
if self.training: # train
|
|
return [p]
|
|
else: # test
|
|
return p[0], (p[1],) # inference output, training output
|
|
|
|
# def fetch_best_arch(self):
|
|
fetch_best_arch = mixq_fetch_best_arch
|
|
|
|
# def complexity_loss(self):
|
|
complexity_loss= mixq_complexity_loss
|
|
bram_loss = mixq_bram_loss
|
|
|
|
class SkyNet_FixQ(nn.Module):
|
|
def __init__(self, bitw='', bita=''):
|
|
super(SkyNet_FixQ, self).__init__()
|
|
self.conv_func = qm.QuantActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
assert(len(bitw)==0 or len(bitw)==13)
|
|
assert(len(bita)==0 or len(bita)==13)
|
|
if len(bitw)==0: bitw='5'*13
|
|
if len(bita)==0: bita='8'*13
|
|
if isinstance(bitw, str):
|
|
bitw=list(map(int, bitw))
|
|
if isinstance(bita, str):
|
|
bita=list(map(int, bita))
|
|
|
|
self.bitw = bitw
|
|
self.bita = bita
|
|
self.model_params = {'bitw': bitw, 'bita': bita}
|
|
|
|
channels = [3,48,96,192,384,512,96]
|
|
|
|
layers = []
|
|
for i in range(6):
|
|
inp, oup = channels[i], channels[i+1]
|
|
layers.append(conv_func(
|
|
inp, inp, kernel_size=3, stride=1, padding=1, bias=False, groups = inp,
|
|
ActQ = qm.ImageInputQ if i==0 else qm.HWGQ, wbit=bitw[i*2], abit=bita[i*2])) # dwconv
|
|
layers.append(nn.BatchNorm2d(channels[i]))
|
|
|
|
layers.append(conv_func(
|
|
inp, oup, kernel_size=1, stride=1, padding=0, bias=False,
|
|
wbit=bitw[i*2+1], abit=bita[i*2+1])) # pwconv
|
|
|
|
layers.append(nn.BatchNorm2d(channels[i+1]))
|
|
|
|
if i<3:
|
|
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
|
|
|
|
layers.append(conv_func(
|
|
channels[-1], 36, kernel_size=1, stride=1, padding=0, bias=False,
|
|
wbit=bitw[-1], abit=bita[-1]))
|
|
|
|
self.layers = nn.Sequential(*layers)
|
|
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
|
|
x = self.layers(x)
|
|
p = self.yololayer(x, img_size) # train: p; test: (io, p)
|
|
|
|
if self.training: # train
|
|
return [p]
|
|
else: # test
|
|
return p[0], (p[1],) # inference output, training output
|
|
|
|
# def fetch_arch_info(self):
|
|
fetch_arch_info = fixq_fetch_arch_info
|
|
|
|
class SkyNetk5_FixQ(nn.Module):
|
|
def __init__(self, bitw = '5555555555555', bita = '8888888888888'):
|
|
super(SkyNetk5_FixQ, self).__init__()
|
|
self.conv_func = qm.QuantActivConv2d
|
|
conv_func = self.conv_func
|
|
|
|
assert(len(bitw)==0 or len(bitw)==13)
|
|
assert(len(bita)==0 or len(bita)==13)
|
|
if len(bitw)==0: bitw='5'*13
|
|
if len(bita)==0: bita='8'*13
|
|
if isinstance(bitw, str):
|
|
bitw=list(map(int, bitw))
|
|
if isinstance(bita, str):
|
|
bita=list(map(int, bita))
|
|
|
|
self.bitw = bitw
|
|
self.bita = bita
|
|
self.model_params = {'bitw': bitw, 'bita': bita}
|
|
|
|
channels = [3,48,96,192,384,512,96]
|
|
|
|
layers = []
|
|
for i in range(6):
|
|
inp, oup = channels[i], channels[i+1]
|
|
layers.append(conv_func(
|
|
inp, inp, kernel_size=5, stride=1, padding=2, bias=False, groups = inp,
|
|
ActQ = qm.ImageInputQ if i==0 else qm.HWGQ, wbit=bitw[i*2], abit=bita[i*2])) # dwconv
|
|
layers.append(nn.BatchNorm2d(channels[i]))
|
|
|
|
layers.append(conv_func(
|
|
inp, oup, kernel_size=1, stride=1, padding=0, bias=False,
|
|
wbit=bitw[i*2+1], abit=bita[i*2+1])) # pwconv
|
|
|
|
layers.append(nn.BatchNorm2d(channels[i+1]))
|
|
|
|
if i<3:
|
|
layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
|
|
|
|
layers.append(conv_func(
|
|
channels[-1], 36, kernel_size=1, stride=1, padding=0, bias=False,
|
|
wbit=bitw[-1], abit=bita[-1]))
|
|
|
|
self.layers = nn.Sequential(*layers)
|
|
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
|
|
x = self.layers(x)
|
|
p = self.yololayer(x, img_size) # train: p; test: (io, p)
|
|
|
|
if self.training: # train
|
|
return [p]
|
|
else: # test
|
|
return p[0], (p[1],) # inference output, training output
|
|
|
|
# def fetch_arch_info(self):
|
|
fetch_arch_info = fixq_fetch_arch_info
|
|
|
|
class SkyNetk5Float(nn.Module):
|
|
def __init__(self):
|
|
super(SkyNetk5Float, self).__init__()
|
|
|
|
self.header = torch.IntTensor([0,0,0,0])
|
|
self.seen = 0
|
|
|
|
self.reorg = ReorgLayer(stride=2)
|
|
def conv_dw(inp, oup, stride):
|
|
return nn.Sequential(
|
|
nn.Conv2d(inp, inp, 5, stride, 2, groups=inp, bias=False),
|
|
nn.BatchNorm2d(inp),
|
|
nn.ReLU6(inplace=True),
|
|
|
|
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
|
nn.BatchNorm2d(oup),
|
|
nn.ReLU6(inplace=True),
|
|
)
|
|
self.layers = nn.Sequential(
|
|
conv_dw( 3, 48, 1), #dw1
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw( 48, 96, 1), #dw2
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw( 96, 192, 1), #dw3
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw(192, 384, 1), #dw4
|
|
conv_dw(384, 512, 1), #dw5
|
|
conv_dw(512, 96, 1),
|
|
nn.Conv2d(96, 36, 1, 1,bias=False),
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
|
|
self.yolo_layers = [self.yololayer]
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x = self.layers(x)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x
|
|
|
|
class SkyNetBypassFloat(nn.Module):
|
|
def __init__(self):
|
|
super(SkyNetBypassFloat, self).__init__()
|
|
|
|
self.header = torch.IntTensor([0,0,0,0])
|
|
self.seen = 0
|
|
|
|
self.reorg = ReorgLayer(stride=2)
|
|
def conv_dw(inp, oup, stride):
|
|
return nn.Sequential(
|
|
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
|
nn.BatchNorm2d(inp),
|
|
nn.ReLU6(inplace=True),
|
|
|
|
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
|
nn.BatchNorm2d(oup),
|
|
nn.ReLU6(inplace=True),
|
|
)
|
|
self.model_p1 = nn.Sequential(
|
|
conv_dw( 3, 48, 1), #dw1
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw( 48, 96, 1), #dw2
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw( 96, 192, 1), #dw3
|
|
)
|
|
self.model_p2 = nn.Sequential(
|
|
nn.MaxPool2d(kernel_size=2, stride=2),
|
|
conv_dw(192, 384, 1), #dw4
|
|
conv_dw(384, 512, 1), #dw5
|
|
)
|
|
self.model_p3 = nn.Sequential( #cat dw3(ch:192 -> 768) and dw5(ch:512)
|
|
conv_dw(1280, 96, 1),
|
|
nn.Conv2d(96, 36, 1, 1,bias=False),
|
|
)
|
|
self.yololayer = YOLOLayer([[20,20], [20,20], [20,20], [20,20], [20,20], [20,20]])
|
|
|
|
self.yolo_layers = [self.yololayer]
|
|
# self._initialize_weights()
|
|
|
|
def forward(self, x):
|
|
img_size = x.shape[-2:]
|
|
yolo_out, out = [], []
|
|
|
|
x_p1 = self.model_p1(x)
|
|
x_p1_reorg = self.reorg(x_p1)
|
|
x_p2 = self.model_p2(x_p1)
|
|
x_p3_in = torch.cat([x_p1_reorg, x_p2], 1)
|
|
x = self.model_p3(x_p3_in)
|
|
x = self.yololayer(x, img_size)
|
|
|
|
yolo_out.append(x)
|
|
|
|
if self.training: # train
|
|
return yolo_out
|
|
else: # test
|
|
io, p = zip(*yolo_out) # inference output, training output
|
|
return torch.cat(io, 1), p
|
|
return x |