Compare commits

..

4 Commits

Author SHA1 Message Date
yjk15133895098 d2881e6f04 PifPaf_tracking_heads.py 2021-10-29 13:35:49 +08:00
yjk15133895098 3e3d554a21 PifPaf_heads.py 2021-10-29 13:35:24 +08:00
yjk15133895098 dd57df73ee PifPaf_headmeta.py 2021-10-29 13:35:00 +08:00
yjk15133895098 2efbaf6c11 densenet of tensorlayer by yjk
densenet of tensorlayer by yjk
2021-09-29 19:28:05 +08:00
6 changed files with 862 additions and 153 deletions

View File

@ -1,153 +1,153 @@
import os
os.environ['TL_BACKEND'] = 'tensorflow'
import time
import multiprocessing
import tensorflow as tf
from tensorlayer.models import TrainOneStep
from tensorlayer.layers import Module
import tensorlayer as tl
from torchsummary import summary
from tensorlayer import logging
from tensorlayer.files import (assign_weights, maybe_download_and_extract)
from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Elementwise, AdaptiveMeanPool2d, MaxPool2d , MeanPool2d,Concat,Dropout)
from tensorlayer.layers import Module, SequentialLayer
class _DenseLayer(Module):
def __init__(self, in_channels, growth_rate, bn_size):
super(_DenseLayer, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
self.layer_list = []
self.layer_list.append(Conv2d(bn_size * growth_rate,(1,1),in_channels=in_channels,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=bn_size * growth_rate,act='relu'))
self.layer_list.append(Conv2d(growth_rate, (3, 3), in_channels=bn_size * growth_rate,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=growth_rate, act='relu'))
self.dense_layer = SequentialLayer(self.layer_list)
self.concat = Concat(1)
# 重载forward函数
def forward(self, x):
new_features = self.dense_layer(x)
return self.concat([x, new_features])
class _DenseBlock(Module):
def __init__(self, num_layers, in_channels, bn_size, growth_rate):
super(_DenseBlock, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
self.layer_list = []
for i in range(num_layers):
self.layer_list.append(_DenseLayer(in_channels + growth_rate * i,growth_rate, bn_size))
self.dense_block = SequentialLayer(self.layer_list)
# 重载forward函数
def forward(self, x):
return self.dense_block(x)
class _Transition(Module):
def __init__(self, in_channels, out_channels):
super(_Transition, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
self.layer_list = []
self.layer_list.append(Conv2d(out_channels,(1,1),in_channels=in_channels,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=out_channels,act='relu'))
self.layer_list.append(MeanPool2d((2,2),strides=(2,2)))
self.transition_layer = SequentialLayer(self.layer_list)
# 重载forward函数
def forward(self, x):
return self.transition_layer(x)
class DenseNet_BC(Module):
def __init__(self, growth_rate=12, block_config=(6, 12, 24, 16),
bn_size=4, theta=0.5, num_classes=10):
super(DenseNet_BC, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
# 初始的卷积为filter:2倍的growth_rate
num_init_feature = 2 * growth_rate
self.layer_list = []
# 表示cifar-10
if num_classes == 10:
self.layer_list.append(Conv2d(num_init_feature,(3,3),strides=(1,1),in_channels=3,W_init=W_init))
else:
self.layer_list.append(Conv2d(num_init_feature,(7,7),strides=(2,2),padding="valid",in_channels=3,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=num_init_feature,act='relu'))
self.layer_list.append(MaxPool2d((3, 3), strides=(2, 2)))
num_feature = num_init_feature
for i, num_layers in enumerate(block_config):
self.layer_list.append( _DenseBlock(num_layers, num_feature,bn_size, growth_rate))
num_feature = num_feature + growth_rate * num_layers
if i != len(block_config) - 1:
self.layer_list.append(_Transition(num_feature,int(num_feature * theta)))
num_feature = int(num_feature * theta)
self.layer_list.append(BatchNorm(num_features=num_feature,act='relu'))
self.layer_list.append(AdaptiveMeanPool2d((1,1)))
self.features = SequentialLayer(self.layer_list)
self.classifier = Dense(num_feature, num_classes,W_init=W_init2,b_init=b_init2)
def forward(self, x):
features = self.features(x)
out = features.view(features.size(0), -1)
out = self.classifier(out)
return out
# DenseNet_BC for ImageNet
def DenseNet121():
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000)
def DenseNet169():
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 32, 32), num_classes=1000)
def DenseNet201():
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 48, 32), num_classes=1000)
def DenseNet161():
return DenseNet_BC(growth_rate=48, block_config=(6, 12, 36, 24), num_classes=1000, )
# DenseNet_BC for cifar
def densenet_BC_100():
return DenseNet_BC(growth_rate=12, block_config=(16, 16, 16))
def builddensenet(name = "densenet-100"):
if name == "densenet-100":
return densenet_BC_100()
elif name == "densenet-121":
return DenseNet121()
else:
print("not found the net")
exit(0)
def test():
net = densenet_BC_100()
print(summary(net, input_size=(3, 32, 32)))
#x = torch.randn(2, 3, 32, 32)
# y = net(x)
# print(y.size())
if __name__ == '__main__':
test()
import os
os.environ['TL_BACKEND'] = 'tensorflow'
import time
import multiprocessing
import tensorflow as tf
from tensorlayer.models import TrainOneStep
from tensorlayer.layers import Module
import tensorlayer as tl
from torchsummary import summary
from tensorlayer import logging
from tensorlayer.files import (assign_weights, maybe_download_and_extract)
from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Elementwise, AdaptiveMeanPool2d, MaxPool2d , MeanPool2d,Concat,Dropout)
from tensorlayer.layers import Module, SequentialLayer
class _DenseLayer(Module):
def __init__(self, in_channels, growth_rate, bn_size):
super(_DenseLayer, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
self.layer_list = []
self.layer_list.append(Conv2d(bn_size * growth_rate,(1,1),in_channels=in_channels,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=bn_size * growth_rate,act='relu'))
self.layer_list.append(Conv2d(growth_rate, (3, 3), in_channels=bn_size * growth_rate,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=growth_rate, act='relu'))
self.dense_layer = SequentialLayer(self.layer_list)
self.concat = Concat(1)
# 重载forward函数
def forward(self, x):
new_features = self.dense_layer(x)
return self.concat([x, new_features])
class _DenseBlock(Module):
def __init__(self, num_layers, in_channels, bn_size, growth_rate):
super(_DenseBlock, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
self.layer_list = []
for i in range(num_layers):
self.layer_list.append(_DenseLayer(in_channels + growth_rate * i,growth_rate, bn_size))
self.dense_block = SequentialLayer(self.layer_list)
# 重载forward函数
def forward(self, x):
return self.dense_block(x)
class _Transition(Module):
def __init__(self, in_channels, out_channels):
super(_Transition, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
self.layer_list = []
self.layer_list.append(Conv2d(out_channels,(1,1),in_channels=in_channels,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=out_channels,act='relu'))
self.layer_list.append(MeanPool2d((2,2),strides=(2,2)))
self.transition_layer = SequentialLayer(self.layer_list)
# 重载forward函数
def forward(self, x):
return self.transition_layer(x)
class DenseNet_BC(Module):
def __init__(self, growth_rate=12, block_config=(6, 12, 24, 16),
bn_size=4, theta=0.5, num_classes=10):
super(DenseNet_BC, self).__init__()
W_init = tl.initializers.truncated_normal(stddev=5e-2)
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
b_init2 = tl.initializers.constant(value=0.1)
# 初始的卷积为filter:2倍的growth_rate
num_init_feature = 2 * growth_rate
self.layer_list = []
# 表示cifar-10
if num_classes == 10:
self.layer_list.append(Conv2d(num_init_feature,(3,3),strides=(1,1),in_channels=3,W_init=W_init))
else:
self.layer_list.append(Conv2d(num_init_feature,(7,7),strides=(2,2),padding="valid",in_channels=3,W_init=W_init))
self.layer_list.append(BatchNorm(num_features=num_init_feature,act='relu'))
self.layer_list.append(MaxPool2d((3, 3), strides=(2, 2)))
num_feature = num_init_feature
for i, num_layers in enumerate(block_config):
self.layer_list.append( _DenseBlock(num_layers, num_feature,bn_size, growth_rate))
num_feature = num_feature + growth_rate * num_layers
if i != len(block_config) - 1:
self.layer_list.append(_Transition(num_feature,int(num_feature * theta)))
num_feature = int(num_feature * theta)
self.layer_list.append(BatchNorm(num_features=num_feature,act='relu'))
self.layer_list.append(AdaptiveMeanPool2d((1,1)))
self.features = SequentialLayer(self.layer_list)
self.classifier = Dense(num_feature, num_classes,W_init=W_init2,b_init=b_init2)
def forward(self, x):
features = self.features(x)
out = features.view(features.size(0), -1)
out = self.classifier(out)
return out
# DenseNet_BC for ImageNet
def DenseNet121():
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000)
def DenseNet169():
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 32, 32), num_classes=1000)
def DenseNet201():
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 48, 32), num_classes=1000)
def DenseNet161():
return DenseNet_BC(growth_rate=48, block_config=(6, 12, 36, 24), num_classes=1000, )
# DenseNet_BC for cifar
def densenet_BC_100():
return DenseNet_BC(growth_rate=12, block_config=(16, 16, 16))
def builddensenet(name = "densenet-100"):
if name == "densenet-100":
return densenet_BC_100()
elif name == "densenet-121":
return DenseNet121()
else:
print("not found the net")
exit(0)
def test():
net = densenet_BC_100()
print(summary(net, input_size=(3, 32, 32)))
#x = torch.randn(2, 3, 32, 32)
# y = net(x)
# print(y.size())
if __name__ == '__main__':
test()

192
headmeta.py Normal file
View File

@ -0,0 +1,192 @@
"""Head meta objects contain meta information about head networks.
This includes the name, the name of the individual fields, the composition, etc.
"""
from dataclasses import dataclass, field
from typing import Any, ClassVar, List, Tuple
import numpy as np
@dataclass
class Base:
name: str
dataset: str
head_index: int = field(default=None, init=False)
base_stride: int = field(default=None, init=False)
upsample_stride: int = field(default=1, init=False)
@property
def stride(self) -> int:
if self.base_stride is None:
return None
return self.base_stride // self.upsample_stride
@property
def n_fields(self) -> int:
raise NotImplementedError
@dataclass
class Cif(Base):
"""Head meta data for a Composite Intensity Field (CIF)."""
keypoints: List[str]
sigmas: List[float]
pose: Any = None
draw_skeleton: List[Tuple[int, int]] = None
score_weights: List[float] = None
n_confidences: ClassVar[int] = 1
n_vectors: ClassVar[int] = 1
n_scales: ClassVar[int] = 1
vector_offsets = [True]
decoder_min_scale = 0.0
decoder_seed_mask: List[int] = None
training_weights: List[float] = None
@property
def n_fields(self):
return len(self.keypoints)
@dataclass
class Caf(Base):
"""Head meta data for a Composite Association Field (CAF)."""
keypoints: List[str]
sigmas: List[float]
skeleton: List[Tuple[int, int]]
pose: Any = None
sparse_skeleton: List[Tuple[int, int]] = None
dense_to_sparse_radius: float = 2.0
only_in_field_of_view: bool = False
n_confidences: ClassVar[int] = 1
n_vectors: ClassVar[int] = 2
n_scales: ClassVar[int] = 2
vector_offsets = [True, True]
decoder_min_distance = 0.0
decoder_max_distance = float('inf')
decoder_confidence_scales: List[float] = None
training_weights: List[float] = None
@property
def n_fields(self):
return len(self.skeleton)
@staticmethod
def concatenate(metas):
# TODO: by keypoint name, update skeleton indices if meta.keypoints
# is not the same for all metas.
concatenated = Caf(
name='_'.join(m.name for m in metas),
dataset=metas[0].dataset,
keypoints=metas[0].keypoints,
sigmas=metas[0].sigmas,
pose=metas[0].pose,
skeleton=[s for meta in metas for s in meta.skeleton],
sparse_skeleton=metas[0].sparse_skeleton,
only_in_field_of_view=metas[0].only_in_field_of_view,
decoder_confidence_scales=[
s
for meta in metas
for s in (meta.decoder_confidence_scales
if meta.decoder_confidence_scales
else [1.0 for _ in meta.skeleton])
]
)
concatenated.head_index = metas[0].head_index
concatenated.base_stride = metas[0].base_stride
concatenated.upsample_stride = metas[0].upsample_stride
return concatenated
@dataclass
class CifDet(Base):
"""Head meta data for a Composite Intensity Field (CIF) for Detection."""
categories: List[str]
n_confidences: ClassVar[int] = 1
n_vectors: ClassVar[int] = 2
n_scales: ClassVar[int] = 0
vector_offsets = [True, False]
decoder_min_scale = 0.0
training_weights: List[float] = None
@property
def n_fields(self):
return len(self.categories)
@dataclass
class TSingleImageCif(Cif):
"""Single-Image CIF head in tracking models."""
@dataclass
class TSingleImageCaf(Caf):
"""Single-Image CAF head in tracking models."""
@dataclass
class Tcaf(Base):
"""Tracking Composite Association Field."""
keypoints_single_frame: List[str]
sigmas_single_frame: List[float]
pose_single_frame: Any
draw_skeleton_single_frame: List[Tuple[int, int]] = None
keypoints: List[str] = None
sigmas: List[float] = None
pose: Any = None
draw_skeleton: List[Tuple[int, int]] = None
only_in_field_of_view: bool = False
n_confidences: ClassVar[int] = 1
n_vectors: ClassVar[int] = 2
n_scales: ClassVar[int] = 2
training_weights: List[float] = None
vector_offsets = [True, True]
def __post_init__(self):
if self.keypoints is None:
self.keypoints = np.concatenate((
self.keypoints_single_frame,
self.keypoints_single_frame,
), axis=0)
if self.sigmas is None:
self.sigmas = np.concatenate((
self.sigmas_single_frame,
self.sigmas_single_frame,
), axis=0)
if self.pose is None:
self.pose = np.concatenate((
self.pose_single_frame,
self.pose_single_frame,
), axis=0)
if self.draw_skeleton is None:
self.draw_skeleton = np.concatenate((
self.draw_skeleton_single_frame,
self.draw_skeleton_single_frame,
), axis=0)
@property
def skeleton(self):
return [(i + 1, i + 1 + len(self.keypoints_single_frame))
for i, _ in enumerate(self.keypoints_single_frame)]
@property
def n_fields(self):
return len(self.keypoints_single_frame)

412
heads.py Normal file
View File

@ -0,0 +1,412 @@
"""Head networks."""
import os
import argparse
import functools
import math
import tensorlayer as tl
from tensorlayer import logging
from tensorlayer.layers import Conv2d,Dropout
import torch
os.environ['TL_BACKEND'] = 'pytorch'
from .import headmeta
from tensorlayer.layers import Module
import numpy as np
LOG = logging.getLogger(__name__)
@functools.lru_cache(maxsize=16)
def index_field_torch(shape, *, device=None, unsqueeze=(0, 0)):
assert len(shape) == 2
xy = np.empty((2, shape[0], shape[1]), device=device)
xy = tl.convert_to_tensor(xy)
xy[0] = np.arange(shape[1], device=device)
xy[0] = tl.convert_to_tensor(xy[0])
xy[1] = np.arange(shape[0], device=device).unsqueeze(1)
xy[1] = tl.convert_to_tensor(xy[1])
for dim in unsqueeze:
xy = torch.unsqueeze(xy, dim)
xy = xy.cpu().numpy()
xy = tl.convert_to_tensor(xy)
return xy
class PifHFlip(Module):
def __init__(self, keypoints, hflip):
super().__init__()
flip_indices = torch.LongTensor([
keypoints.index(hflip[kp_name]) if kp_name in hflip else kp_i
for kp_i, kp_name in enumerate(keypoints)
])
LOG.debug('hflip indices: %s', flip_indices)
self.register_buffer('flip_indices', flip_indices)
def forward(self, *args):
out = []
for field in args:
field = torch.index_select(field, 1, self.flip_indices)
field = torch.flip(field, dims=[len(field.shape) - 1])
out.append(field)
# flip the x-coordinate of the vector component
out[1][:, :, 0, :, :] *= -1.0
return out
class PafHFlip(Module):
def __init__(self, keypoints, skeleton, hflip):
super().__init__()
skeleton_names = [
(keypoints[j1 - 1], keypoints[j2 - 1])
for j1, j2 in skeleton
]
flipped_skeleton_names = [
(hflip[j1] if j1 in hflip else j1, hflip[j2] if j2 in hflip else j2)
for j1, j2 in skeleton_names
]
LOG.debug('skeleton = %s, flipped_skeleton = %s',
skeleton_names, flipped_skeleton_names)
flip_indices = list(range(len(skeleton)))
reverse_direction = []
for paf_i, (n1, n2) in enumerate(skeleton_names):
if (n1, n2) in flipped_skeleton_names:
flip_indices[paf_i] = flipped_skeleton_names.index((n1, n2))
if (n2, n1) in flipped_skeleton_names:
flip_indices[paf_i] = flipped_skeleton_names.index((n2, n1))
reverse_direction.append(paf_i)
LOG.debug('hflip indices: %s, reverse: %s', flip_indices, reverse_direction)
self.register_buffer('flip_indices', torch.LongTensor(flip_indices))
self.register_buffer('reverse_direction', torch.LongTensor(reverse_direction))
def forward(self, *args):
out = []
for field in args:
field = torch.index_select(field, 1, self.flip_indices)
field = torch.flip(field, dims=[len(field.shape) - 1])
out.append(field)
# flip the x-coordinate of the vector components
out[1][:, :, 0, :, :] *= -1.0
out[2][:, :, 0, :, :] *= -1.0
# reverse direction
for paf_i in self.reverse_direction:
cc = torch.clone(out[1][:, paf_i])
out[1][:, paf_i] = out[2][:, paf_i]
out[2][:, paf_i] = cc
return out
class HeadNetwork(Module):
"""Base class for head networks.
:param meta: head meta instance to configure this head network
:param in_features: number of input features which should be equal to the
base network's output features
"""
def __init__(self, meta: headmeta.Base, in_features: int):
super().__init__()
self.meta = meta
self.in_features = in_features
@classmethod
def cli(cls, parser: argparse.ArgumentParser):
"""Command line interface (CLI) to extend argument parser."""
@classmethod
def configure(cls, args: argparse.Namespace):
"""Take the parsed argument parser output and configure class variables."""
def forward(self, x):
raise NotImplementedError
class CompositeField3(HeadNetwork):
dropout_p = 0.0
inplace_ops = True
def __init__(self,
meta: headmeta.Base,
in_features, *,
kernel_size=1, padding=0, dilation=1):
super().__init__(meta, in_features)
LOG.debug('%s config: fields = %d, confidences = %d, vectors = %d, scales = %d '
'kernel = %d, padding = %d, dilation = %d',
meta.name, meta.n_fields, meta.n_confidences, meta.n_vectors, meta.n_scales,
kernel_size, padding, dilation)
self.dropout = Dropout(p=self.dropout_p)
# convolution
out_features = meta.n_fields * (meta.n_confidences + meta.n_vectors * 3 + meta.n_scales)
self.conv = Conv2d(out_features * (meta.upsample_stride ** 2),
(kernel_size,kernel_size), padding=(padding,padding), dilation=(dilation,dilation),in_channels=in_features)
# upsample
assert meta.upsample_stride >= 1
self.upsample_op = None
if meta.upsample_stride > 1:
self.upsample_op = torch.nn.PixelShuffle(meta.upsample_stride)
@classmethod
def cli(cls, parser: argparse.ArgumentParser):
group = parser.add_argument_group('CompositeField3')
group.add_argument('--cf3-dropout', default=cls.dropout_p, type=float,
help='[experimental] zeroing probability of feature in head input')
assert cls.inplace_ops
group.add_argument('--cf3-no-inplace-ops', dest='cf3_inplace_ops',
default=True, action='store_false',
help='alternative graph without inplace ops')
@classmethod
def configure(cls, args: argparse.Namespace):
cls.dropout_p = args.cf3_dropout
cls.inplace_ops = args.cf3_inplace_ops
@property
def sparse_task_parameters(self):
return [self.conv.weight]
def forward(self, x): # pylint: disable=arguments-differ
x = self.dropout(x)
x = self.conv(x)
# upscale
if self.upsample_op is not None:
x = self.upsample_op(x)
low_cut = (self.meta.upsample_stride - 1) // 2
high_cut = math.ceil((self.meta.upsample_stride - 1) / 2.0)
if self.training:
# negative axes not supported by ONNX TensorRT
x = x[:, :, low_cut:-high_cut, low_cut:-high_cut]
else:
# the int() forces the tracer to use static shape
x = x[:, :, low_cut:int(x.shape[2]) - high_cut, low_cut:int(x.shape[3]) - high_cut]
# Extract some shape parameters once.
# Convert to int so that shape is constant in ONNX export.
x_size = x.size()
batch_size = x_size[0]
feature_height = int(x_size[2])
feature_width = int(x_size[3])
x = x.view(
batch_size,
self.meta.n_fields,
self.meta.n_confidences + self.meta.n_vectors * 3 + self.meta.n_scales,
feature_height,
feature_width
)
if not self.training and self.inplace_ops:
# classification
classes_x = x[:, :, 0:self.meta.n_confidences]
tl.sigmoid(classes_x)
# regressions x: add index
if self.meta.n_vectors > 0:
index_field = index_field_torch((feature_height, feature_width), device=x.device)
first_reg_feature = self.meta.n_confidences
for i, do_offset in enumerate(self.meta.vector_offsets):
if not do_offset:
continue
reg_x = x[:, :, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
reg_x.add_(index_field)
# scale
first_scale_feature = self.meta.n_confidences + self.meta.n_vectors * 3
scales_x = x[:, :, first_scale_feature:first_scale_feature + self.meta.n_scales]
scales_x[:] = torch.nn.functional.softplus(scales_x)
# remove width in the middle and add one to the front (v4 style)
first_width_feature = self.meta.n_confidences + self.meta.n_vectors * 2
x = tl.concat([
x[:, :, first_width_feature:first_width_feature + 1],
x[:, :, :first_width_feature],
x[:, :, self.meta.n_confidences + self.meta.n_vectors * 3:],
], dim=2)
elif not self.training and not self.inplace_ops:
# TODO: CoreMLv4 does not like strided slices.
# Strides are avoided when switching the first and second dim
# temporarily.
x = tl.transpose(x, 1, 2)
# classification
classes_x = x[:, 0:self.meta.n_confidences]
classes_x = tl.sigmoid(classes_x)
# regressions x
first_reg_feature = self.meta.n_confidences
regs_x = [
x[:, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
for i in range(self.meta.n_vectors)
]
# regressions x: add index
index_field = index_field_torch(
(feature_height, feature_width), device=x.device, unsqueeze=(1, 0))
# TODO: coreml export does not work with the index_field creation in the graph.
index_field = tl.convert_to_tensor(index_field.numpy())
regs_x = [reg_x + index_field if do_offset else reg_x
for reg_x, do_offset in zip(regs_x, self.meta.vector_offsets)]
# regressions logb
first_reglogb_feature = self.meta.n_confidences + self.meta.n_vectors * 2
single_reg_logb = x[:, first_reglogb_feature:first_reglogb_feature + 1]
# scale
first_scale_feature = self.meta.n_confidences + self.meta.n_vectors * 3
scales_x = x[:, first_scale_feature:first_scale_feature + self.meta.n_scales]
scales_x = torch.nn.functional.softplus(scales_x)
# concat with width in front (v4 style)
x = tl.concat([single_reg_logb, classes_x, *regs_x, scales_x], dim=1)
# TODO: CoreMLv4 problem (see above).
x = tl.transpose(x, 1, 2)
return x
class CompositeField4(HeadNetwork):
dropout_p = 0.0
inplace_ops = True
def __init__(self,
meta: headmeta.Base,
in_features, *,
kernel_size=1, padding=0, dilation=1):
super().__init__(meta, in_features)
LOG.debug('%s config: fields = %d, confidences = %d, vectors = %d, scales = %d '
'kernel = %d, padding = %d, dilation = %d',
meta.name, meta.n_fields, meta.n_confidences, meta.n_vectors, meta.n_scales,
kernel_size, padding, dilation)
self.dropout = Dropout(p=self.dropout_p)
# convolution
self.n_components = 1 + meta.n_confidences + meta.n_vectors * 2 + meta.n_scales
self.conv = Conv2d(
meta.n_fields * self.n_components * (meta.upsample_stride ** 2),
(kernel_size,kernel_size), padding=(padding,padding), dilation=(dilation,dilation),in_channels= in_features
)
# upsample
assert meta.upsample_stride >= 1
self.upsample_op = None
if meta.upsample_stride > 1:
self.upsample_op = torch.nn.PixelShuffle(meta.upsample_stride)
@classmethod
def cli(cls, parser: argparse.ArgumentParser):
group = parser.add_argument_group('CompositeField4')
group.add_argument('--cf4-dropout', default=cls.dropout_p, type=float,
help='[experimental] zeroing probability of feature in head input')
assert cls.inplace_ops
group.add_argument('--cf4-no-inplace-ops', dest='cf4_inplace_ops',
default=True, action='store_false',
help='alternative graph without inplace ops')
@classmethod
def configure(cls, args: argparse.Namespace):
cls.dropout_p = args.cf4_dropout
cls.inplace_ops = args.cf4_inplace_ops
@property
def sparse_task_parameters(self):
return [self.conv.weight]
def forward(self, x): # pylint: disable=arguments-differ
x = self.dropout(x)
x = self.conv(x)
# upscale
if self.upsample_op is not None:
x = self.upsample_op(x)
low_cut = (self.meta.upsample_stride - 1) // 2
high_cut = math.ceil((self.meta.upsample_stride - 1) / 2.0)
if self.training:
# negative axes not supported by ONNX TensorRT
x = x[:, :, low_cut:-high_cut, low_cut:-high_cut]
else:
# the int() forces the tracer to use static shape
x = x[:, :, low_cut:int(x.shape[2]) - high_cut, low_cut:int(x.shape[3]) - high_cut]
# Extract some shape parameters once.
# Convert to int so that shape is constant in ONNX export.
x_size = x.size()
batch_size = x_size[0]
feature_height = int(x_size[2])
feature_width = int(x_size[3])
x = x.view(
batch_size,
self.meta.n_fields,
self.n_components,
feature_height,
feature_width
)
if not self.training and self.inplace_ops:
# classification
classes_x = x[:, :, 1:1 + self.meta.n_confidences]
tl.sigmoid(classes_x)
# regressions x: add index
if self.meta.n_vectors > 0:
index_field = index_field_torch((feature_height, feature_width), device=x.device)
first_reg_feature = 1 + self.meta.n_confidences
for i, do_offset in enumerate(self.meta.vector_offsets):
if not do_offset:
continue
reg_x = x[:, :, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
reg_x.add_(index_field)
# scale
first_scale_feature = 1 + self.meta.n_confidences + self.meta.n_vectors * 2
scales_x = x[:, :, first_scale_feature:first_scale_feature + self.meta.n_scales]
scales_x[:] = torch.nn.functional.softplus(scales_x)
elif not self.training and not self.inplace_ops:
# TODO: CoreMLv4 does not like strided slices.
# Strides are avoided when switching the first and second dim
# temporarily.
x = torch.transpose(x, 1, 2)
# width
width_x = x[:, 0:1]
# classification
classes_x = x[:, 1:1 + self.meta.n_confidences]
classes_x = torch.sigmoid(classes_x)
# regressions x
first_reg_feature = 1 + self.meta.n_confidences
regs_x = [
x[:, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
for i in range(self.meta.n_vectors)
]
# regressions x: add index
index_field = index_field_torch(
(feature_height, feature_width), device=x.device, unsqueeze=(1, 0))
# TODO: coreml export does not work with the index_field creation in the graph.
index_field = tl.convert_to_tensor(index_field.numpy())
regs_x = [reg_x + index_field if do_offset else reg_x
for reg_x, do_offset in zip(regs_x, self.meta.vector_offsets)]
# scale
first_scale_feature = 1 + self.meta.n_confidences + self.meta.n_vectors * 2
scales_x = x[:, first_scale_feature:first_scale_feature + self.meta.n_scales]
scales_x = torch.nn.functional.softplus(scales_x)
# concat
x = tl.concat([width_x, classes_x, *regs_x, scales_x], dim=1)
# TODO: CoreMLv4 problem (see above).
x = tl.transpose(x, 1, 2)
return x

105
tracking_heads.py Normal file
View File

@ -0,0 +1,105 @@
import tensorlayer as tl
from tensorlayer.layers import Conv2d
from tensorlayer.layers import SequentialLayer
from .heads import HeadNetwork, CompositeField4
class TBaseSingleImage(HeadNetwork):
"""Filter the feature map so that they can be used by single image loss.
Training: only apply loss to image 0 of an image pair of image 0 and 1.
Evaluation with forward tracking pose: only keep image 0.
Evaluation with full tracking pose: keep all but stack group along feature dim.
"""
forward_tracking_pose = True
tracking_pose_length = 2
def __init__(self, meta, in_features):
super().__init__(meta, in_features)
self.head = CompositeField4(meta, in_features)
def forward(self, *args):
x = args[0]
if self.training:
x = x[::2]
elif self.forward_tracking_pose:
x = x[::self.tracking_pose_length]
x = self.head(x)
if not self.training and not self.forward_tracking_pose:
# full tracking pose eval
# TODO: stack batch dimension in feature dimension and adjust
# meta information (make it a property to dynamically return
# a different meta for evaluation)
raise NotImplementedError
return x
class Tcaf(HeadNetwork):
"""Filter the feature map so that they can be used by single image loss.
Training: only apply loss to image 0 of an image pair of image 0 and 1.
Evaluation with forward tracking pose: only keep image 0.
Evaluation with full tracking pose: keep all.
"""
tracking_pose_length = 2
reduced_features = 512
_global_feature_reduction = None
_global_feature_compute = None
def __init__(self, meta, in_features):
super().__init__(meta, in_features)
if self._global_feature_reduction is None:
self.__class__._global_feature_reduction = SequentialLayer(
[Conv2d(self.reduced_features,
kernel_size=(1,1), bias=True,in_channels=in_features),
tl.ReLU(inplace=True)]
)
self.feature_reduction = self._global_feature_reduction
if self._global_feature_compute is None:
self.__class__._global_feature_compute = SequentialLayer(
[Conv2d(self.reduced_features * 2,kernel_size=(1,1) ,bias=True,in_channels=self.reduced_features * 2),
tl.ReLU(inplace=True)]
)
self.feature_compute = self._global_feature_compute
self.head = CompositeField4(meta, self.reduced_features * 2)
def forward(self, *args):
x = args[0]
# Batches that are not intended for tracking loss might have an
# odd number of images (or only 1 image).
# In that case, simply do not execute this head as the result should
# never be used.
if len(x) % 2 == 1:
return None
x = self.feature_reduction(x)
group_length = 2 if self.training else self.tracking_pose_length
primary = x[::group_length]
others = [x[i::group_length] for i in range(1, group_length)]
x = tl.stack([tl.concat([primary, o], dim=1) for o in others], dim=1)
x_shape = x.size()
x = tl.reshape(x, [x_shape[0] * x_shape[1]] + list(x_shape[2:]))
x = self.feature_compute(x)
x = self.head(x)
if self.tracking_pose_length != 2:
# TODO need to stack group from batch dim in feature dim and adjust
# meta info
raise NotImplementedError
return x