Compare commits
4 Commits
master
...
PifPaf_tra
Author | SHA1 | Date |
---|---|---|
yjk15133895098 | d2881e6f04 | |
yjk15133895098 | 3e3d554a21 | |
yjk15133895098 | dd57df73ee | |
yjk15133895098 | 2efbaf6c11 |
|
@ -1,153 +1,153 @@
|
|||
import os
|
||||
os.environ['TL_BACKEND'] = 'tensorflow'
|
||||
import time
|
||||
import multiprocessing
|
||||
import tensorflow as tf
|
||||
from tensorlayer.models import TrainOneStep
|
||||
from tensorlayer.layers import Module
|
||||
import tensorlayer as tl
|
||||
from torchsummary import summary
|
||||
from tensorlayer import logging
|
||||
from tensorlayer.files import (assign_weights, maybe_download_and_extract)
|
||||
from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Elementwise, AdaptiveMeanPool2d, MaxPool2d , MeanPool2d,Concat,Dropout)
|
||||
from tensorlayer.layers import Module, SequentialLayer
|
||||
|
||||
|
||||
class _DenseLayer(Module):
|
||||
def __init__(self, in_channels, growth_rate, bn_size):
|
||||
super(_DenseLayer, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
self.layer_list = []
|
||||
self.layer_list.append(Conv2d(bn_size * growth_rate,(1,1),in_channels=in_channels,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=bn_size * growth_rate,act='relu'))
|
||||
self.layer_list.append(Conv2d(growth_rate, (3, 3), in_channels=bn_size * growth_rate,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=growth_rate, act='relu'))
|
||||
self.dense_layer = SequentialLayer(self.layer_list)
|
||||
self.concat = Concat(1)
|
||||
|
||||
# 重载forward函数
|
||||
def forward(self, x):
|
||||
new_features = self.dense_layer(x)
|
||||
return self.concat([x, new_features])
|
||||
|
||||
|
||||
class _DenseBlock(Module):
|
||||
def __init__(self, num_layers, in_channels, bn_size, growth_rate):
|
||||
super(_DenseBlock, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
self.layer_list = []
|
||||
for i in range(num_layers):
|
||||
self.layer_list.append(_DenseLayer(in_channels + growth_rate * i,growth_rate, bn_size))
|
||||
self.dense_block = SequentialLayer(self.layer_list)
|
||||
|
||||
# 重载forward函数
|
||||
def forward(self, x):
|
||||
return self.dense_block(x)
|
||||
|
||||
|
||||
class _Transition(Module):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(_Transition, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
self.layer_list = []
|
||||
self.layer_list.append(Conv2d(out_channels,(1,1),in_channels=in_channels,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=out_channels,act='relu'))
|
||||
self.layer_list.append(MeanPool2d((2,2),strides=(2,2)))
|
||||
self.transition_layer = SequentialLayer(self.layer_list)
|
||||
|
||||
# 重载forward函数
|
||||
def forward(self, x):
|
||||
return self.transition_layer(x)
|
||||
|
||||
class DenseNet_BC(Module):
|
||||
def __init__(self, growth_rate=12, block_config=(6, 12, 24, 16),
|
||||
bn_size=4, theta=0.5, num_classes=10):
|
||||
super(DenseNet_BC, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
# 初始的卷积为filter:2倍的growth_rate
|
||||
num_init_feature = 2 * growth_rate
|
||||
self.layer_list = []
|
||||
# 表示cifar-10
|
||||
if num_classes == 10:
|
||||
self.layer_list.append(Conv2d(num_init_feature,(3,3),strides=(1,1),in_channels=3,W_init=W_init))
|
||||
|
||||
else:
|
||||
self.layer_list.append(Conv2d(num_init_feature,(7,7),strides=(2,2),padding="valid",in_channels=3,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=num_init_feature,act='relu'))
|
||||
self.layer_list.append(MaxPool2d((3, 3), strides=(2, 2)))
|
||||
|
||||
|
||||
num_feature = num_init_feature
|
||||
for i, num_layers in enumerate(block_config):
|
||||
self.layer_list.append( _DenseBlock(num_layers, num_feature,bn_size, growth_rate))
|
||||
num_feature = num_feature + growth_rate * num_layers
|
||||
if i != len(block_config) - 1:
|
||||
self.layer_list.append(_Transition(num_feature,int(num_feature * theta)))
|
||||
num_feature = int(num_feature * theta)
|
||||
|
||||
self.layer_list.append(BatchNorm(num_features=num_feature,act='relu'))
|
||||
self.layer_list.append(AdaptiveMeanPool2d((1,1)))
|
||||
|
||||
self.features = SequentialLayer(self.layer_list)
|
||||
self.classifier = Dense(num_feature, num_classes,W_init=W_init2,b_init=b_init2)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
features = self.features(x)
|
||||
out = features.view(features.size(0), -1)
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
# DenseNet_BC for ImageNet
|
||||
def DenseNet121():
|
||||
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000)
|
||||
|
||||
|
||||
def DenseNet169():
|
||||
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 32, 32), num_classes=1000)
|
||||
|
||||
|
||||
def DenseNet201():
|
||||
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 48, 32), num_classes=1000)
|
||||
|
||||
|
||||
def DenseNet161():
|
||||
return DenseNet_BC(growth_rate=48, block_config=(6, 12, 36, 24), num_classes=1000, )
|
||||
|
||||
|
||||
# DenseNet_BC for cifar
|
||||
def densenet_BC_100():
|
||||
return DenseNet_BC(growth_rate=12, block_config=(16, 16, 16))
|
||||
|
||||
def builddensenet(name = "densenet-100"):
|
||||
if name == "densenet-100":
|
||||
return densenet_BC_100()
|
||||
elif name == "densenet-121":
|
||||
return DenseNet121()
|
||||
else:
|
||||
print("not found the net")
|
||||
exit(0)
|
||||
|
||||
def test():
|
||||
net = densenet_BC_100()
|
||||
print(summary(net, input_size=(3, 32, 32)))
|
||||
|
||||
#x = torch.randn(2, 3, 32, 32)
|
||||
# y = net(x)
|
||||
# print(y.size())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
||||
|
||||
|
||||
import os
|
||||
os.environ['TL_BACKEND'] = 'tensorflow'
|
||||
import time
|
||||
import multiprocessing
|
||||
import tensorflow as tf
|
||||
from tensorlayer.models import TrainOneStep
|
||||
from tensorlayer.layers import Module
|
||||
import tensorlayer as tl
|
||||
from torchsummary import summary
|
||||
from tensorlayer import logging
|
||||
from tensorlayer.files import (assign_weights, maybe_download_and_extract)
|
||||
from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Elementwise, AdaptiveMeanPool2d, MaxPool2d , MeanPool2d,Concat,Dropout)
|
||||
from tensorlayer.layers import Module, SequentialLayer
|
||||
|
||||
|
||||
class _DenseLayer(Module):
|
||||
def __init__(self, in_channels, growth_rate, bn_size):
|
||||
super(_DenseLayer, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
self.layer_list = []
|
||||
self.layer_list.append(Conv2d(bn_size * growth_rate,(1,1),in_channels=in_channels,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=bn_size * growth_rate,act='relu'))
|
||||
self.layer_list.append(Conv2d(growth_rate, (3, 3), in_channels=bn_size * growth_rate,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=growth_rate, act='relu'))
|
||||
self.dense_layer = SequentialLayer(self.layer_list)
|
||||
self.concat = Concat(1)
|
||||
|
||||
# 重载forward函数
|
||||
def forward(self, x):
|
||||
new_features = self.dense_layer(x)
|
||||
return self.concat([x, new_features])
|
||||
|
||||
|
||||
class _DenseBlock(Module):
|
||||
def __init__(self, num_layers, in_channels, bn_size, growth_rate):
|
||||
super(_DenseBlock, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
self.layer_list = []
|
||||
for i in range(num_layers):
|
||||
self.layer_list.append(_DenseLayer(in_channels + growth_rate * i,growth_rate, bn_size))
|
||||
self.dense_block = SequentialLayer(self.layer_list)
|
||||
|
||||
# 重载forward函数
|
||||
def forward(self, x):
|
||||
return self.dense_block(x)
|
||||
|
||||
|
||||
class _Transition(Module):
|
||||
def __init__(self, in_channels, out_channels):
|
||||
super(_Transition, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
self.layer_list = []
|
||||
self.layer_list.append(Conv2d(out_channels,(1,1),in_channels=in_channels,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=out_channels,act='relu'))
|
||||
self.layer_list.append(MeanPool2d((2,2),strides=(2,2)))
|
||||
self.transition_layer = SequentialLayer(self.layer_list)
|
||||
|
||||
# 重载forward函数
|
||||
def forward(self, x):
|
||||
return self.transition_layer(x)
|
||||
|
||||
class DenseNet_BC(Module):
|
||||
def __init__(self, growth_rate=12, block_config=(6, 12, 24, 16),
|
||||
bn_size=4, theta=0.5, num_classes=10):
|
||||
super(DenseNet_BC, self).__init__()
|
||||
W_init = tl.initializers.truncated_normal(stddev=5e-2)
|
||||
W_init2 = tl.initializers.truncated_normal(stddev=0.04)
|
||||
b_init2 = tl.initializers.constant(value=0.1)
|
||||
# 初始的卷积为filter:2倍的growth_rate
|
||||
num_init_feature = 2 * growth_rate
|
||||
self.layer_list = []
|
||||
# 表示cifar-10
|
||||
if num_classes == 10:
|
||||
self.layer_list.append(Conv2d(num_init_feature,(3,3),strides=(1,1),in_channels=3,W_init=W_init))
|
||||
|
||||
else:
|
||||
self.layer_list.append(Conv2d(num_init_feature,(7,7),strides=(2,2),padding="valid",in_channels=3,W_init=W_init))
|
||||
self.layer_list.append(BatchNorm(num_features=num_init_feature,act='relu'))
|
||||
self.layer_list.append(MaxPool2d((3, 3), strides=(2, 2)))
|
||||
|
||||
|
||||
num_feature = num_init_feature
|
||||
for i, num_layers in enumerate(block_config):
|
||||
self.layer_list.append( _DenseBlock(num_layers, num_feature,bn_size, growth_rate))
|
||||
num_feature = num_feature + growth_rate * num_layers
|
||||
if i != len(block_config) - 1:
|
||||
self.layer_list.append(_Transition(num_feature,int(num_feature * theta)))
|
||||
num_feature = int(num_feature * theta)
|
||||
|
||||
self.layer_list.append(BatchNorm(num_features=num_feature,act='relu'))
|
||||
self.layer_list.append(AdaptiveMeanPool2d((1,1)))
|
||||
|
||||
self.features = SequentialLayer(self.layer_list)
|
||||
self.classifier = Dense(num_feature, num_classes,W_init=W_init2,b_init=b_init2)
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
features = self.features(x)
|
||||
out = features.view(features.size(0), -1)
|
||||
out = self.classifier(out)
|
||||
return out
|
||||
|
||||
|
||||
# DenseNet_BC for ImageNet
|
||||
def DenseNet121():
|
||||
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 24, 16), num_classes=1000)
|
||||
|
||||
|
||||
def DenseNet169():
|
||||
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 32, 32), num_classes=1000)
|
||||
|
||||
|
||||
def DenseNet201():
|
||||
return DenseNet_BC(growth_rate=32, block_config=(6, 12, 48, 32), num_classes=1000)
|
||||
|
||||
|
||||
def DenseNet161():
|
||||
return DenseNet_BC(growth_rate=48, block_config=(6, 12, 36, 24), num_classes=1000, )
|
||||
|
||||
|
||||
# DenseNet_BC for cifar
|
||||
def densenet_BC_100():
|
||||
return DenseNet_BC(growth_rate=12, block_config=(16, 16, 16))
|
||||
|
||||
def builddensenet(name = "densenet-100"):
|
||||
if name == "densenet-100":
|
||||
return densenet_BC_100()
|
||||
elif name == "densenet-121":
|
||||
return DenseNet121()
|
||||
else:
|
||||
print("not found the net")
|
||||
exit(0)
|
||||
|
||||
def test():
|
||||
net = densenet_BC_100()
|
||||
print(summary(net, input_size=(3, 32, 32)))
|
||||
|
||||
#x = torch.randn(2, 3, 32, 32)
|
||||
# y = net(x)
|
||||
# print(y.size())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
"""Head meta objects contain meta information about head networks.
|
||||
|
||||
This includes the name, the name of the individual fields, the composition, etc.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, ClassVar, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class Base:
|
||||
name: str
|
||||
dataset: str
|
||||
|
||||
head_index: int = field(default=None, init=False)
|
||||
base_stride: int = field(default=None, init=False)
|
||||
upsample_stride: int = field(default=1, init=False)
|
||||
|
||||
@property
|
||||
def stride(self) -> int:
|
||||
if self.base_stride is None:
|
||||
return None
|
||||
return self.base_stride // self.upsample_stride
|
||||
|
||||
@property
|
||||
def n_fields(self) -> int:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@dataclass
|
||||
class Cif(Base):
|
||||
"""Head meta data for a Composite Intensity Field (CIF)."""
|
||||
|
||||
keypoints: List[str]
|
||||
sigmas: List[float]
|
||||
pose: Any = None
|
||||
draw_skeleton: List[Tuple[int, int]] = None
|
||||
score_weights: List[float] = None
|
||||
|
||||
n_confidences: ClassVar[int] = 1
|
||||
n_vectors: ClassVar[int] = 1
|
||||
n_scales: ClassVar[int] = 1
|
||||
|
||||
vector_offsets = [True]
|
||||
decoder_min_scale = 0.0
|
||||
decoder_seed_mask: List[int] = None
|
||||
|
||||
training_weights: List[float] = None
|
||||
|
||||
@property
|
||||
def n_fields(self):
|
||||
return len(self.keypoints)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Caf(Base):
|
||||
"""Head meta data for a Composite Association Field (CAF)."""
|
||||
|
||||
keypoints: List[str]
|
||||
sigmas: List[float]
|
||||
skeleton: List[Tuple[int, int]]
|
||||
pose: Any = None
|
||||
sparse_skeleton: List[Tuple[int, int]] = None
|
||||
dense_to_sparse_radius: float = 2.0
|
||||
only_in_field_of_view: bool = False
|
||||
|
||||
n_confidences: ClassVar[int] = 1
|
||||
n_vectors: ClassVar[int] = 2
|
||||
n_scales: ClassVar[int] = 2
|
||||
|
||||
vector_offsets = [True, True]
|
||||
decoder_min_distance = 0.0
|
||||
decoder_max_distance = float('inf')
|
||||
decoder_confidence_scales: List[float] = None
|
||||
|
||||
training_weights: List[float] = None
|
||||
|
||||
@property
|
||||
def n_fields(self):
|
||||
return len(self.skeleton)
|
||||
|
||||
@staticmethod
|
||||
def concatenate(metas):
|
||||
# TODO: by keypoint name, update skeleton indices if meta.keypoints
|
||||
# is not the same for all metas.
|
||||
concatenated = Caf(
|
||||
name='_'.join(m.name for m in metas),
|
||||
dataset=metas[0].dataset,
|
||||
keypoints=metas[0].keypoints,
|
||||
sigmas=metas[0].sigmas,
|
||||
pose=metas[0].pose,
|
||||
skeleton=[s for meta in metas for s in meta.skeleton],
|
||||
sparse_skeleton=metas[0].sparse_skeleton,
|
||||
only_in_field_of_view=metas[0].only_in_field_of_view,
|
||||
decoder_confidence_scales=[
|
||||
s
|
||||
for meta in metas
|
||||
for s in (meta.decoder_confidence_scales
|
||||
if meta.decoder_confidence_scales
|
||||
else [1.0 for _ in meta.skeleton])
|
||||
]
|
||||
)
|
||||
concatenated.head_index = metas[0].head_index
|
||||
concatenated.base_stride = metas[0].base_stride
|
||||
concatenated.upsample_stride = metas[0].upsample_stride
|
||||
return concatenated
|
||||
|
||||
|
||||
@dataclass
|
||||
class CifDet(Base):
|
||||
"""Head meta data for a Composite Intensity Field (CIF) for Detection."""
|
||||
|
||||
categories: List[str]
|
||||
|
||||
n_confidences: ClassVar[int] = 1
|
||||
n_vectors: ClassVar[int] = 2
|
||||
n_scales: ClassVar[int] = 0
|
||||
|
||||
vector_offsets = [True, False]
|
||||
decoder_min_scale = 0.0
|
||||
|
||||
training_weights: List[float] = None
|
||||
|
||||
@property
|
||||
def n_fields(self):
|
||||
return len(self.categories)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TSingleImageCif(Cif):
|
||||
"""Single-Image CIF head in tracking models."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class TSingleImageCaf(Caf):
|
||||
"""Single-Image CAF head in tracking models."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class Tcaf(Base):
|
||||
"""Tracking Composite Association Field."""
|
||||
|
||||
keypoints_single_frame: List[str]
|
||||
sigmas_single_frame: List[float]
|
||||
pose_single_frame: Any
|
||||
draw_skeleton_single_frame: List[Tuple[int, int]] = None
|
||||
keypoints: List[str] = None
|
||||
sigmas: List[float] = None
|
||||
pose: Any = None
|
||||
draw_skeleton: List[Tuple[int, int]] = None
|
||||
only_in_field_of_view: bool = False
|
||||
|
||||
n_confidences: ClassVar[int] = 1
|
||||
n_vectors: ClassVar[int] = 2
|
||||
n_scales: ClassVar[int] = 2
|
||||
|
||||
training_weights: List[float] = None
|
||||
|
||||
vector_offsets = [True, True]
|
||||
|
||||
def __post_init__(self):
|
||||
if self.keypoints is None:
|
||||
self.keypoints = np.concatenate((
|
||||
self.keypoints_single_frame,
|
||||
self.keypoints_single_frame,
|
||||
), axis=0)
|
||||
if self.sigmas is None:
|
||||
self.sigmas = np.concatenate((
|
||||
self.sigmas_single_frame,
|
||||
self.sigmas_single_frame,
|
||||
), axis=0)
|
||||
if self.pose is None:
|
||||
self.pose = np.concatenate((
|
||||
self.pose_single_frame,
|
||||
self.pose_single_frame,
|
||||
), axis=0)
|
||||
if self.draw_skeleton is None:
|
||||
self.draw_skeleton = np.concatenate((
|
||||
self.draw_skeleton_single_frame,
|
||||
self.draw_skeleton_single_frame,
|
||||
), axis=0)
|
||||
|
||||
@property
|
||||
def skeleton(self):
|
||||
return [(i + 1, i + 1 + len(self.keypoints_single_frame))
|
||||
for i, _ in enumerate(self.keypoints_single_frame)]
|
||||
|
||||
@property
|
||||
def n_fields(self):
|
||||
return len(self.keypoints_single_frame)
|
|
@ -0,0 +1,412 @@
|
|||
"""Head networks."""
|
||||
import os
|
||||
import argparse
|
||||
import functools
|
||||
import math
|
||||
import tensorlayer as tl
|
||||
from tensorlayer import logging
|
||||
from tensorlayer.layers import Conv2d,Dropout
|
||||
import torch
|
||||
os.environ['TL_BACKEND'] = 'pytorch'
|
||||
from .import headmeta
|
||||
from tensorlayer.layers import Module
|
||||
import numpy as np
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@functools.lru_cache(maxsize=16)
|
||||
def index_field_torch(shape, *, device=None, unsqueeze=(0, 0)):
|
||||
assert len(shape) == 2
|
||||
xy = np.empty((2, shape[0], shape[1]), device=device)
|
||||
xy = tl.convert_to_tensor(xy)
|
||||
xy[0] = np.arange(shape[1], device=device)
|
||||
xy[0] = tl.convert_to_tensor(xy[0])
|
||||
xy[1] = np.arange(shape[0], device=device).unsqueeze(1)
|
||||
xy[1] = tl.convert_to_tensor(xy[1])
|
||||
|
||||
for dim in unsqueeze:
|
||||
xy = torch.unsqueeze(xy, dim)
|
||||
xy = xy.cpu().numpy()
|
||||
xy = tl.convert_to_tensor(xy)
|
||||
|
||||
return xy
|
||||
|
||||
|
||||
class PifHFlip(Module):
|
||||
def __init__(self, keypoints, hflip):
|
||||
super().__init__()
|
||||
|
||||
flip_indices = torch.LongTensor([
|
||||
keypoints.index(hflip[kp_name]) if kp_name in hflip else kp_i
|
||||
for kp_i, kp_name in enumerate(keypoints)
|
||||
])
|
||||
LOG.debug('hflip indices: %s', flip_indices)
|
||||
self.register_buffer('flip_indices', flip_indices)
|
||||
|
||||
def forward(self, *args):
|
||||
out = []
|
||||
for field in args:
|
||||
field = torch.index_select(field, 1, self.flip_indices)
|
||||
field = torch.flip(field, dims=[len(field.shape) - 1])
|
||||
out.append(field)
|
||||
|
||||
# flip the x-coordinate of the vector component
|
||||
out[1][:, :, 0, :, :] *= -1.0
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class PafHFlip(Module):
|
||||
def __init__(self, keypoints, skeleton, hflip):
|
||||
super().__init__()
|
||||
skeleton_names = [
|
||||
(keypoints[j1 - 1], keypoints[j2 - 1])
|
||||
for j1, j2 in skeleton
|
||||
]
|
||||
flipped_skeleton_names = [
|
||||
(hflip[j1] if j1 in hflip else j1, hflip[j2] if j2 in hflip else j2)
|
||||
for j1, j2 in skeleton_names
|
||||
]
|
||||
LOG.debug('skeleton = %s, flipped_skeleton = %s',
|
||||
skeleton_names, flipped_skeleton_names)
|
||||
|
||||
flip_indices = list(range(len(skeleton)))
|
||||
reverse_direction = []
|
||||
for paf_i, (n1, n2) in enumerate(skeleton_names):
|
||||
if (n1, n2) in flipped_skeleton_names:
|
||||
flip_indices[paf_i] = flipped_skeleton_names.index((n1, n2))
|
||||
if (n2, n1) in flipped_skeleton_names:
|
||||
flip_indices[paf_i] = flipped_skeleton_names.index((n2, n1))
|
||||
reverse_direction.append(paf_i)
|
||||
LOG.debug('hflip indices: %s, reverse: %s', flip_indices, reverse_direction)
|
||||
|
||||
self.register_buffer('flip_indices', torch.LongTensor(flip_indices))
|
||||
self.register_buffer('reverse_direction', torch.LongTensor(reverse_direction))
|
||||
|
||||
def forward(self, *args):
|
||||
out = []
|
||||
for field in args:
|
||||
field = torch.index_select(field, 1, self.flip_indices)
|
||||
field = torch.flip(field, dims=[len(field.shape) - 1])
|
||||
out.append(field)
|
||||
|
||||
# flip the x-coordinate of the vector components
|
||||
out[1][:, :, 0, :, :] *= -1.0
|
||||
out[2][:, :, 0, :, :] *= -1.0
|
||||
|
||||
# reverse direction
|
||||
for paf_i in self.reverse_direction:
|
||||
cc = torch.clone(out[1][:, paf_i])
|
||||
out[1][:, paf_i] = out[2][:, paf_i]
|
||||
out[2][:, paf_i] = cc
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class HeadNetwork(Module):
|
||||
"""Base class for head networks.
|
||||
|
||||
:param meta: head meta instance to configure this head network
|
||||
:param in_features: number of input features which should be equal to the
|
||||
base network's output features
|
||||
"""
|
||||
def __init__(self, meta: headmeta.Base, in_features: int):
|
||||
super().__init__()
|
||||
self.meta = meta
|
||||
self.in_features = in_features
|
||||
|
||||
@classmethod
|
||||
def cli(cls, parser: argparse.ArgumentParser):
|
||||
"""Command line interface (CLI) to extend argument parser."""
|
||||
|
||||
@classmethod
|
||||
def configure(cls, args: argparse.Namespace):
|
||||
"""Take the parsed argument parser output and configure class variables."""
|
||||
|
||||
def forward(self, x):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class CompositeField3(HeadNetwork):
|
||||
dropout_p = 0.0
|
||||
inplace_ops = True
|
||||
|
||||
def __init__(self,
|
||||
meta: headmeta.Base,
|
||||
in_features, *,
|
||||
kernel_size=1, padding=0, dilation=1):
|
||||
super().__init__(meta, in_features)
|
||||
|
||||
LOG.debug('%s config: fields = %d, confidences = %d, vectors = %d, scales = %d '
|
||||
'kernel = %d, padding = %d, dilation = %d',
|
||||
meta.name, meta.n_fields, meta.n_confidences, meta.n_vectors, meta.n_scales,
|
||||
kernel_size, padding, dilation)
|
||||
|
||||
self.dropout = Dropout(p=self.dropout_p)
|
||||
|
||||
|
||||
# convolution
|
||||
out_features = meta.n_fields * (meta.n_confidences + meta.n_vectors * 3 + meta.n_scales)
|
||||
self.conv = Conv2d(out_features * (meta.upsample_stride ** 2),
|
||||
(kernel_size,kernel_size), padding=(padding,padding), dilation=(dilation,dilation),in_channels=in_features)
|
||||
|
||||
# upsample
|
||||
assert meta.upsample_stride >= 1
|
||||
self.upsample_op = None
|
||||
if meta.upsample_stride > 1:
|
||||
self.upsample_op = torch.nn.PixelShuffle(meta.upsample_stride)
|
||||
|
||||
@classmethod
|
||||
def cli(cls, parser: argparse.ArgumentParser):
|
||||
group = parser.add_argument_group('CompositeField3')
|
||||
group.add_argument('--cf3-dropout', default=cls.dropout_p, type=float,
|
||||
help='[experimental] zeroing probability of feature in head input')
|
||||
assert cls.inplace_ops
|
||||
group.add_argument('--cf3-no-inplace-ops', dest='cf3_inplace_ops',
|
||||
default=True, action='store_false',
|
||||
help='alternative graph without inplace ops')
|
||||
|
||||
@classmethod
|
||||
def configure(cls, args: argparse.Namespace):
|
||||
cls.dropout_p = args.cf3_dropout
|
||||
cls.inplace_ops = args.cf3_inplace_ops
|
||||
|
||||
@property
|
||||
def sparse_task_parameters(self):
|
||||
return [self.conv.weight]
|
||||
|
||||
def forward(self, x): # pylint: disable=arguments-differ
|
||||
x = self.dropout(x)
|
||||
x = self.conv(x)
|
||||
# upscale
|
||||
if self.upsample_op is not None:
|
||||
x = self.upsample_op(x)
|
||||
low_cut = (self.meta.upsample_stride - 1) // 2
|
||||
high_cut = math.ceil((self.meta.upsample_stride - 1) / 2.0)
|
||||
if self.training:
|
||||
# negative axes not supported by ONNX TensorRT
|
||||
x = x[:, :, low_cut:-high_cut, low_cut:-high_cut]
|
||||
else:
|
||||
# the int() forces the tracer to use static shape
|
||||
x = x[:, :, low_cut:int(x.shape[2]) - high_cut, low_cut:int(x.shape[3]) - high_cut]
|
||||
|
||||
# Extract some shape parameters once.
|
||||
# Convert to int so that shape is constant in ONNX export.
|
||||
x_size = x.size()
|
||||
batch_size = x_size[0]
|
||||
feature_height = int(x_size[2])
|
||||
feature_width = int(x_size[3])
|
||||
|
||||
x = x.view(
|
||||
batch_size,
|
||||
self.meta.n_fields,
|
||||
self.meta.n_confidences + self.meta.n_vectors * 3 + self.meta.n_scales,
|
||||
feature_height,
|
||||
feature_width
|
||||
)
|
||||
|
||||
if not self.training and self.inplace_ops:
|
||||
# classification
|
||||
classes_x = x[:, :, 0:self.meta.n_confidences]
|
||||
tl.sigmoid(classes_x)
|
||||
|
||||
# regressions x: add index
|
||||
if self.meta.n_vectors > 0:
|
||||
index_field = index_field_torch((feature_height, feature_width), device=x.device)
|
||||
first_reg_feature = self.meta.n_confidences
|
||||
for i, do_offset in enumerate(self.meta.vector_offsets):
|
||||
if not do_offset:
|
||||
continue
|
||||
reg_x = x[:, :, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
|
||||
reg_x.add_(index_field)
|
||||
|
||||
# scale
|
||||
first_scale_feature = self.meta.n_confidences + self.meta.n_vectors * 3
|
||||
scales_x = x[:, :, first_scale_feature:first_scale_feature + self.meta.n_scales]
|
||||
scales_x[:] = torch.nn.functional.softplus(scales_x)
|
||||
|
||||
# remove width in the middle and add one to the front (v4 style)
|
||||
first_width_feature = self.meta.n_confidences + self.meta.n_vectors * 2
|
||||
x = tl.concat([
|
||||
x[:, :, first_width_feature:first_width_feature + 1],
|
||||
x[:, :, :first_width_feature],
|
||||
x[:, :, self.meta.n_confidences + self.meta.n_vectors * 3:],
|
||||
], dim=2)
|
||||
elif not self.training and not self.inplace_ops:
|
||||
# TODO: CoreMLv4 does not like strided slices.
|
||||
# Strides are avoided when switching the first and second dim
|
||||
# temporarily.
|
||||
x = tl.transpose(x, 1, 2)
|
||||
|
||||
# classification
|
||||
classes_x = x[:, 0:self.meta.n_confidences]
|
||||
classes_x = tl.sigmoid(classes_x)
|
||||
|
||||
# regressions x
|
||||
first_reg_feature = self.meta.n_confidences
|
||||
regs_x = [
|
||||
x[:, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
|
||||
for i in range(self.meta.n_vectors)
|
||||
]
|
||||
# regressions x: add index
|
||||
index_field = index_field_torch(
|
||||
(feature_height, feature_width), device=x.device, unsqueeze=(1, 0))
|
||||
# TODO: coreml export does not work with the index_field creation in the graph.
|
||||
index_field = tl.convert_to_tensor(index_field.numpy())
|
||||
regs_x = [reg_x + index_field if do_offset else reg_x
|
||||
for reg_x, do_offset in zip(regs_x, self.meta.vector_offsets)]
|
||||
|
||||
# regressions logb
|
||||
first_reglogb_feature = self.meta.n_confidences + self.meta.n_vectors * 2
|
||||
single_reg_logb = x[:, first_reglogb_feature:first_reglogb_feature + 1]
|
||||
|
||||
# scale
|
||||
first_scale_feature = self.meta.n_confidences + self.meta.n_vectors * 3
|
||||
scales_x = x[:, first_scale_feature:first_scale_feature + self.meta.n_scales]
|
||||
scales_x = torch.nn.functional.softplus(scales_x)
|
||||
|
||||
# concat with width in front (v4 style)
|
||||
x = tl.concat([single_reg_logb, classes_x, *regs_x, scales_x], dim=1)
|
||||
|
||||
# TODO: CoreMLv4 problem (see above).
|
||||
x = tl.transpose(x, 1, 2)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class CompositeField4(HeadNetwork):
|
||||
dropout_p = 0.0
|
||||
inplace_ops = True
|
||||
|
||||
def __init__(self,
|
||||
meta: headmeta.Base,
|
||||
in_features, *,
|
||||
kernel_size=1, padding=0, dilation=1):
|
||||
super().__init__(meta, in_features)
|
||||
|
||||
LOG.debug('%s config: fields = %d, confidences = %d, vectors = %d, scales = %d '
|
||||
'kernel = %d, padding = %d, dilation = %d',
|
||||
meta.name, meta.n_fields, meta.n_confidences, meta.n_vectors, meta.n_scales,
|
||||
kernel_size, padding, dilation)
|
||||
|
||||
self.dropout = Dropout(p=self.dropout_p)
|
||||
|
||||
# convolution
|
||||
self.n_components = 1 + meta.n_confidences + meta.n_vectors * 2 + meta.n_scales
|
||||
self.conv = Conv2d(
|
||||
meta.n_fields * self.n_components * (meta.upsample_stride ** 2),
|
||||
(kernel_size,kernel_size), padding=(padding,padding), dilation=(dilation,dilation),in_channels= in_features
|
||||
)
|
||||
|
||||
# upsample
|
||||
assert meta.upsample_stride >= 1
|
||||
self.upsample_op = None
|
||||
if meta.upsample_stride > 1:
|
||||
self.upsample_op = torch.nn.PixelShuffle(meta.upsample_stride)
|
||||
|
||||
@classmethod
|
||||
def cli(cls, parser: argparse.ArgumentParser):
|
||||
group = parser.add_argument_group('CompositeField4')
|
||||
group.add_argument('--cf4-dropout', default=cls.dropout_p, type=float,
|
||||
help='[experimental] zeroing probability of feature in head input')
|
||||
assert cls.inplace_ops
|
||||
group.add_argument('--cf4-no-inplace-ops', dest='cf4_inplace_ops',
|
||||
default=True, action='store_false',
|
||||
help='alternative graph without inplace ops')
|
||||
|
||||
@classmethod
|
||||
def configure(cls, args: argparse.Namespace):
|
||||
cls.dropout_p = args.cf4_dropout
|
||||
cls.inplace_ops = args.cf4_inplace_ops
|
||||
|
||||
@property
|
||||
def sparse_task_parameters(self):
|
||||
return [self.conv.weight]
|
||||
|
||||
def forward(self, x): # pylint: disable=arguments-differ
|
||||
x = self.dropout(x)
|
||||
x = self.conv(x)
|
||||
# upscale
|
||||
if self.upsample_op is not None:
|
||||
x = self.upsample_op(x)
|
||||
low_cut = (self.meta.upsample_stride - 1) // 2
|
||||
high_cut = math.ceil((self.meta.upsample_stride - 1) / 2.0)
|
||||
if self.training:
|
||||
# negative axes not supported by ONNX TensorRT
|
||||
x = x[:, :, low_cut:-high_cut, low_cut:-high_cut]
|
||||
else:
|
||||
# the int() forces the tracer to use static shape
|
||||
x = x[:, :, low_cut:int(x.shape[2]) - high_cut, low_cut:int(x.shape[3]) - high_cut]
|
||||
|
||||
# Extract some shape parameters once.
|
||||
# Convert to int so that shape is constant in ONNX export.
|
||||
x_size = x.size()
|
||||
batch_size = x_size[0]
|
||||
feature_height = int(x_size[2])
|
||||
feature_width = int(x_size[3])
|
||||
|
||||
x = x.view(
|
||||
batch_size,
|
||||
self.meta.n_fields,
|
||||
self.n_components,
|
||||
feature_height,
|
||||
feature_width
|
||||
)
|
||||
|
||||
if not self.training and self.inplace_ops:
|
||||
# classification
|
||||
classes_x = x[:, :, 1:1 + self.meta.n_confidences]
|
||||
tl.sigmoid(classes_x)
|
||||
|
||||
# regressions x: add index
|
||||
if self.meta.n_vectors > 0:
|
||||
index_field = index_field_torch((feature_height, feature_width), device=x.device)
|
||||
first_reg_feature = 1 + self.meta.n_confidences
|
||||
for i, do_offset in enumerate(self.meta.vector_offsets):
|
||||
if not do_offset:
|
||||
continue
|
||||
reg_x = x[:, :, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
|
||||
reg_x.add_(index_field)
|
||||
|
||||
# scale
|
||||
first_scale_feature = 1 + self.meta.n_confidences + self.meta.n_vectors * 2
|
||||
scales_x = x[:, :, first_scale_feature:first_scale_feature + self.meta.n_scales]
|
||||
scales_x[:] = torch.nn.functional.softplus(scales_x)
|
||||
elif not self.training and not self.inplace_ops:
|
||||
# TODO: CoreMLv4 does not like strided slices.
|
||||
# Strides are avoided when switching the first and second dim
|
||||
# temporarily.
|
||||
x = torch.transpose(x, 1, 2)
|
||||
|
||||
# width
|
||||
width_x = x[:, 0:1]
|
||||
|
||||
# classification
|
||||
classes_x = x[:, 1:1 + self.meta.n_confidences]
|
||||
classes_x = torch.sigmoid(classes_x)
|
||||
|
||||
# regressions x
|
||||
first_reg_feature = 1 + self.meta.n_confidences
|
||||
regs_x = [
|
||||
x[:, first_reg_feature + i * 2:first_reg_feature + (i + 1) * 2]
|
||||
for i in range(self.meta.n_vectors)
|
||||
]
|
||||
# regressions x: add index
|
||||
index_field = index_field_torch(
|
||||
(feature_height, feature_width), device=x.device, unsqueeze=(1, 0))
|
||||
# TODO: coreml export does not work with the index_field creation in the graph.
|
||||
index_field = tl.convert_to_tensor(index_field.numpy())
|
||||
regs_x = [reg_x + index_field if do_offset else reg_x
|
||||
for reg_x, do_offset in zip(regs_x, self.meta.vector_offsets)]
|
||||
|
||||
# scale
|
||||
first_scale_feature = 1 + self.meta.n_confidences + self.meta.n_vectors * 2
|
||||
scales_x = x[:, first_scale_feature:first_scale_feature + self.meta.n_scales]
|
||||
scales_x = torch.nn.functional.softplus(scales_x)
|
||||
|
||||
# concat
|
||||
x = tl.concat([width_x, classes_x, *regs_x, scales_x], dim=1)
|
||||
|
||||
# TODO: CoreMLv4 problem (see above).
|
||||
x = tl.transpose(x, 1, 2)
|
||||
|
||||
return x
|
|
@ -0,0 +1,105 @@
|
|||
import tensorlayer as tl
|
||||
from tensorlayer.layers import Conv2d
|
||||
from tensorlayer.layers import SequentialLayer
|
||||
|
||||
|
||||
from .heads import HeadNetwork, CompositeField4
|
||||
|
||||
|
||||
class TBaseSingleImage(HeadNetwork):
|
||||
"""Filter the feature map so that they can be used by single image loss.
|
||||
|
||||
Training: only apply loss to image 0 of an image pair of image 0 and 1.
|
||||
Evaluation with forward tracking pose: only keep image 0.
|
||||
Evaluation with full tracking pose: keep all but stack group along feature dim.
|
||||
"""
|
||||
forward_tracking_pose = True
|
||||
tracking_pose_length = 2
|
||||
|
||||
def __init__(self, meta, in_features):
|
||||
super().__init__(meta, in_features)
|
||||
self.head = CompositeField4(meta, in_features)
|
||||
|
||||
def forward(self, *args):
|
||||
x = args[0]
|
||||
|
||||
if self.training:
|
||||
x = x[::2]
|
||||
elif self.forward_tracking_pose:
|
||||
x = x[::self.tracking_pose_length]
|
||||
|
||||
x = self.head(x)
|
||||
|
||||
if not self.training and not self.forward_tracking_pose:
|
||||
# full tracking pose eval
|
||||
# TODO: stack batch dimension in feature dimension and adjust
|
||||
# meta information (make it a property to dynamically return
|
||||
# a different meta for evaluation)
|
||||
raise NotImplementedError
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class Tcaf(HeadNetwork):
|
||||
"""Filter the feature map so that they can be used by single image loss.
|
||||
|
||||
Training: only apply loss to image 0 of an image pair of image 0 and 1.
|
||||
Evaluation with forward tracking pose: only keep image 0.
|
||||
Evaluation with full tracking pose: keep all.
|
||||
"""
|
||||
tracking_pose_length = 2
|
||||
reduced_features = 512
|
||||
|
||||
_global_feature_reduction = None
|
||||
_global_feature_compute = None
|
||||
|
||||
def __init__(self, meta, in_features):
|
||||
super().__init__(meta, in_features)
|
||||
|
||||
if self._global_feature_reduction is None:
|
||||
self.__class__._global_feature_reduction = SequentialLayer(
|
||||
[Conv2d(self.reduced_features,
|
||||
kernel_size=(1,1), bias=True,in_channels=in_features),
|
||||
tl.ReLU(inplace=True)]
|
||||
)
|
||||
self.feature_reduction = self._global_feature_reduction
|
||||
|
||||
if self._global_feature_compute is None:
|
||||
self.__class__._global_feature_compute = SequentialLayer(
|
||||
[Conv2d(self.reduced_features * 2,kernel_size=(1,1) ,bias=True,in_channels=self.reduced_features * 2),
|
||||
tl.ReLU(inplace=True)]
|
||||
)
|
||||
self.feature_compute = self._global_feature_compute
|
||||
|
||||
self.head = CompositeField4(meta, self.reduced_features * 2)
|
||||
|
||||
def forward(self, *args):
|
||||
x = args[0]
|
||||
|
||||
# Batches that are not intended for tracking loss might have an
|
||||
# odd number of images (or only 1 image).
|
||||
# In that case, simply do not execute this head as the result should
|
||||
# never be used.
|
||||
if len(x) % 2 == 1:
|
||||
return None
|
||||
|
||||
x = self.feature_reduction(x)
|
||||
|
||||
group_length = 2 if self.training else self.tracking_pose_length
|
||||
primary = x[::group_length]
|
||||
others = [x[i::group_length] for i in range(1, group_length)]
|
||||
|
||||
x = tl.stack([tl.concat([primary, o], dim=1) for o in others], dim=1)
|
||||
x_shape = x.size()
|
||||
x = tl.reshape(x, [x_shape[0] * x_shape[1]] + list(x_shape[2:]))
|
||||
|
||||
x = self.feature_compute(x)
|
||||
|
||||
x = self.head(x)
|
||||
|
||||
if self.tracking_pose_length != 2:
|
||||
# TODO need to stack group from batch dim in feature dim and adjust
|
||||
# meta info
|
||||
raise NotImplementedError
|
||||
|
||||
return x
|
Loading…
Reference in New Issue