mirror of https://github.com/open-mmlab/mmpose
536 lines
13 KiB
Python
536 lines
13 KiB
Python
_base_ = ['../../../_base_/default_runtime.py']
|
|
|
|
# runtime
|
|
train_cfg = dict(max_epochs=600, val_interval=20, dynamic_intervals=[(580, 1)])
|
|
|
|
auto_scale_lr = dict(base_batch_size=256)
|
|
|
|
default_hooks = dict(
|
|
checkpoint=dict(type='CheckpointHook', interval=40, max_keep_ckpts=3))
|
|
|
|
optim_wrapper = dict(
|
|
type='OptimWrapper',
|
|
constructor='ForceDefaultOptimWrapperConstructor',
|
|
optimizer=dict(type='AdamW', lr=0.004, weight_decay=0.05),
|
|
paramwise_cfg=dict(
|
|
norm_decay_mult=0,
|
|
bias_decay_mult=0,
|
|
bypass_duplicate=True,
|
|
force_default_settings=True,
|
|
custom_keys=dict({'neck.encoder': dict(lr_mult=0.05)})),
|
|
clip_grad=dict(max_norm=0.1, norm_type=2))
|
|
|
|
param_scheduler = [
|
|
dict(
|
|
type='QuadraticWarmupLR',
|
|
by_epoch=True,
|
|
begin=0,
|
|
end=5,
|
|
convert_to_iter_based=True),
|
|
dict(
|
|
type='CosineAnnealingLR',
|
|
eta_min=0.0002,
|
|
begin=5,
|
|
T_max=280,
|
|
end=280,
|
|
by_epoch=True,
|
|
convert_to_iter_based=True),
|
|
# this scheduler is used to increase the lr from 2e-4 to 5e-4
|
|
dict(type='ConstantLR', by_epoch=True, factor=2.5, begin=280, end=281),
|
|
dict(
|
|
type='CosineAnnealingLR',
|
|
eta_min=0.0002,
|
|
begin=281,
|
|
T_max=300,
|
|
end=580,
|
|
by_epoch=True,
|
|
convert_to_iter_based=True),
|
|
dict(type='ConstantLR', by_epoch=True, factor=1, begin=580, end=600),
|
|
]
|
|
|
|
# data
|
|
input_size = (640, 640)
|
|
metafile = 'configs/_base_/datasets/coco.py'
|
|
codec = dict(type='YOLOXPoseAnnotationProcessor', input_size=input_size)
|
|
|
|
train_pipeline_stage1 = [
|
|
dict(type='LoadImage', backend_args=None),
|
|
dict(
|
|
type='Mosaic',
|
|
img_scale=(640, 640),
|
|
pad_val=114.0,
|
|
pre_transform=[dict(type='LoadImage', backend_args=None)]),
|
|
dict(
|
|
type='BottomupRandomAffine',
|
|
input_size=(640, 640),
|
|
shift_factor=0.1,
|
|
rotate_factor=10,
|
|
scale_factor=(0.75, 1.0),
|
|
pad_val=114,
|
|
distribution='uniform',
|
|
transform_mode='perspective',
|
|
bbox_keep_corner=False,
|
|
clip_border=True,
|
|
),
|
|
dict(
|
|
type='YOLOXMixUp',
|
|
img_scale=(640, 640),
|
|
ratio_range=(0.8, 1.6),
|
|
pad_val=114.0,
|
|
pre_transform=[dict(type='LoadImage', backend_args=None)]),
|
|
dict(type='YOLOXHSVRandomAug'),
|
|
dict(type='RandomFlip'),
|
|
dict(type='FilterAnnotations', by_kpt=True, by_box=True, keep_empty=False),
|
|
dict(type='GenerateTarget', encoder=codec),
|
|
dict(type='PackPoseInputs'),
|
|
]
|
|
train_pipeline_stage2 = [
|
|
dict(type='LoadImage'),
|
|
dict(
|
|
type='BottomupRandomAffine',
|
|
input_size=(640, 640),
|
|
shift_prob=0,
|
|
rotate_prob=0,
|
|
scale_prob=0,
|
|
scale_type='long',
|
|
pad_val=(114, 114, 114),
|
|
bbox_keep_corner=False,
|
|
clip_border=True,
|
|
),
|
|
dict(type='YOLOXHSVRandomAug'),
|
|
dict(type='RandomFlip'),
|
|
dict(type='BottomupGetHeatmapMask', get_invalid=True),
|
|
dict(type='FilterAnnotations', by_kpt=True, by_box=True, keep_empty=False),
|
|
dict(type='GenerateTarget', encoder=codec),
|
|
dict(type='PackPoseInputs'),
|
|
]
|
|
|
|
# data settings
|
|
data_mode = 'bottomup'
|
|
data_root = 'data/'
|
|
|
|
# mapping
|
|
aic_coco = [
|
|
(0, 6),
|
|
(1, 8),
|
|
(2, 10),
|
|
(3, 5),
|
|
(4, 7),
|
|
(5, 9),
|
|
(6, 12),
|
|
(7, 14),
|
|
(8, 16),
|
|
(9, 11),
|
|
(10, 13),
|
|
(11, 15),
|
|
]
|
|
|
|
crowdpose_coco = [
|
|
(0, 5),
|
|
(1, 6),
|
|
(2, 7),
|
|
(3, 8),
|
|
(4, 9),
|
|
(5, 10),
|
|
(6, 11),
|
|
(7, 12),
|
|
(8, 13),
|
|
(9, 14),
|
|
(10, 15),
|
|
(11, 16),
|
|
]
|
|
|
|
mpii_coco = [
|
|
(0, 16),
|
|
(1, 14),
|
|
(2, 12),
|
|
(3, 11),
|
|
(4, 13),
|
|
(5, 15),
|
|
(10, 10),
|
|
(11, 8),
|
|
(12, 6),
|
|
(13, 5),
|
|
(14, 7),
|
|
(15, 9),
|
|
]
|
|
|
|
jhmdb_coco = [
|
|
(3, 6),
|
|
(4, 5),
|
|
(5, 12),
|
|
(6, 11),
|
|
(7, 8),
|
|
(8, 7),
|
|
(9, 14),
|
|
(10, 13),
|
|
(11, 10),
|
|
(12, 9),
|
|
(13, 16),
|
|
(14, 15),
|
|
]
|
|
|
|
halpe_coco = [
|
|
(0, 0),
|
|
(1, 1),
|
|
(2, 2),
|
|
(3, 3),
|
|
(4, 4),
|
|
(5, 5),
|
|
(6, 6),
|
|
(7, 7),
|
|
(8, 8),
|
|
(9, 9),
|
|
(10, 10),
|
|
(11, 11),
|
|
(12, 12),
|
|
(13, 13),
|
|
(14, 14),
|
|
(15, 15),
|
|
(16, 16),
|
|
]
|
|
|
|
ochuman_coco = [
|
|
(0, 0),
|
|
(1, 1),
|
|
(2, 2),
|
|
(3, 3),
|
|
(4, 4),
|
|
(5, 5),
|
|
(6, 6),
|
|
(7, 7),
|
|
(8, 8),
|
|
(9, 9),
|
|
(10, 10),
|
|
(11, 11),
|
|
(12, 12),
|
|
(13, 13),
|
|
(14, 14),
|
|
(15, 15),
|
|
(16, 16),
|
|
]
|
|
|
|
posetrack_coco = [
|
|
(0, 0),
|
|
(3, 3),
|
|
(4, 4),
|
|
(5, 5),
|
|
(6, 6),
|
|
(7, 7),
|
|
(8, 8),
|
|
(9, 9),
|
|
(10, 10),
|
|
(11, 11),
|
|
(12, 12),
|
|
(13, 13),
|
|
(14, 14),
|
|
(15, 15),
|
|
(16, 16),
|
|
]
|
|
|
|
# train datasets
|
|
dataset_coco = dict(
|
|
type='CocoDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='coco/annotations/person_keypoints_train2017.json',
|
|
data_prefix=dict(img='coco/train2017/'),
|
|
pipeline=[
|
|
dict(
|
|
type='KeypointConverter',
|
|
num_keypoints=17,
|
|
mapping=[(i, i) for i in range(17)])
|
|
],
|
|
)
|
|
|
|
dataset_aic = dict(
|
|
type='AicDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='aic/annotations/aic_train.json',
|
|
data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
|
|
'_train_20170902/keypoint_train_images_20170902/'),
|
|
pipeline=[
|
|
dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
|
|
],
|
|
)
|
|
|
|
dataset_crowdpose = dict(
|
|
type='CrowdPoseDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
|
|
data_prefix=dict(img='pose/CrowdPose/images/'),
|
|
pipeline=[
|
|
dict(
|
|
type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
|
|
],
|
|
)
|
|
|
|
dataset_mpii = dict(
|
|
type='MpiiDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='mpii/annotations/mpii_train.json',
|
|
data_prefix=dict(img='pose/MPI/images/'),
|
|
pipeline=[
|
|
dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
|
|
],
|
|
)
|
|
|
|
dataset_jhmdb = dict(
|
|
type='JhmdbDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='jhmdb/annotations/Sub1_train.json',
|
|
data_prefix=dict(img='pose/JHMDB/'),
|
|
pipeline=[
|
|
dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
|
|
],
|
|
)
|
|
|
|
dataset_halpe = dict(
|
|
type='HalpeDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='halpe/annotations/halpe_train_v1.json',
|
|
data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
|
|
pipeline=[
|
|
dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
|
|
],
|
|
)
|
|
|
|
dataset_posetrack = dict(
|
|
type='PoseTrack18Dataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='posetrack18/annotations/posetrack18_train.json',
|
|
data_prefix=dict(img='pose/PoseChallenge2018/'),
|
|
pipeline=[
|
|
dict(
|
|
type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
|
|
],
|
|
)
|
|
|
|
train_dataset = dict(
|
|
type='CombinedDataset',
|
|
metainfo=dict(from_file=metafile),
|
|
datasets=[
|
|
dataset_coco,
|
|
dataset_aic,
|
|
dataset_crowdpose,
|
|
dataset_mpii,
|
|
dataset_jhmdb,
|
|
dataset_halpe,
|
|
dataset_posetrack,
|
|
],
|
|
sample_ratio_factor=[1, 0.3, 0.5, 0.3, 0.3, 0.4, 0.3],
|
|
test_mode=False,
|
|
pipeline=train_pipeline_stage1)
|
|
|
|
train_dataloader = dict(
|
|
batch_size=32,
|
|
num_workers=8,
|
|
persistent_workers=True,
|
|
pin_memory=True,
|
|
sampler=dict(type='DefaultSampler', shuffle=True),
|
|
dataset=train_dataset)
|
|
|
|
# val datasets
|
|
val_pipeline = [
|
|
dict(type='LoadImage'),
|
|
dict(
|
|
type='BottomupResize', input_size=input_size, pad_val=(114, 114, 114)),
|
|
dict(
|
|
type='PackPoseInputs',
|
|
meta_keys=('id', 'img_id', 'img_path', 'ori_shape', 'img_shape',
|
|
'input_size', 'input_center', 'input_scale'))
|
|
]
|
|
|
|
val_dataloader = dict(
|
|
batch_size=1,
|
|
num_workers=2,
|
|
persistent_workers=True,
|
|
pin_memory=True,
|
|
drop_last=False,
|
|
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
|
|
dataset=dict(
|
|
type='CocoDataset',
|
|
data_root=data_root,
|
|
data_mode=data_mode,
|
|
ann_file='coco/annotations/person_keypoints_val2017.json',
|
|
data_prefix=dict(img='coco/val2017/'),
|
|
test_mode=True,
|
|
pipeline=val_pipeline,
|
|
))
|
|
test_dataloader = val_dataloader
|
|
|
|
# evaluators
|
|
val_evaluator = dict(
|
|
type='CocoMetric',
|
|
ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json',
|
|
score_mode='bbox',
|
|
nms_mode='none',
|
|
)
|
|
test_evaluator = val_evaluator
|
|
|
|
# hooks
|
|
custom_hooks = [
|
|
dict(
|
|
type='YOLOXPoseModeSwitchHook',
|
|
num_last_epochs=20,
|
|
new_train_dataset=dataset_coco,
|
|
new_train_pipeline=train_pipeline_stage2,
|
|
priority=48),
|
|
dict(
|
|
type='RTMOModeSwitchHook',
|
|
epoch_attributes={
|
|
280: {
|
|
'proxy_target_cc': True,
|
|
'loss_mle.loss_weight': 5.0,
|
|
'loss_oks.loss_weight': 10.0
|
|
},
|
|
},
|
|
priority=48),
|
|
dict(type='SyncNormHook', priority=48),
|
|
dict(
|
|
type='EMAHook',
|
|
ema_type='ExpMomentumEMA',
|
|
momentum=0.0002,
|
|
update_buffers=True,
|
|
strict_load=False,
|
|
priority=49),
|
|
]
|
|
|
|
# model
|
|
widen_factor = 0.5
|
|
deepen_factor = 0.33
|
|
|
|
model = dict(
|
|
type='BottomupPoseEstimator',
|
|
init_cfg=dict(
|
|
type='Kaiming',
|
|
layer='Conv2d',
|
|
a=2.23606797749979,
|
|
distribution='uniform',
|
|
mode='fan_in',
|
|
nonlinearity='leaky_relu'),
|
|
data_preprocessor=dict(
|
|
type='PoseDataPreprocessor',
|
|
pad_size_divisor=32,
|
|
mean=[0, 0, 0],
|
|
std=[1, 1, 1],
|
|
batch_augments=[
|
|
dict(
|
|
type='BatchSyncRandomResize',
|
|
random_size_range=(480, 800),
|
|
size_divisor=32,
|
|
interval=1),
|
|
]),
|
|
backbone=dict(
|
|
type='CSPDarknet',
|
|
deepen_factor=deepen_factor,
|
|
widen_factor=widen_factor,
|
|
out_indices=(2, 3, 4),
|
|
spp_kernal_sizes=(5, 9, 13),
|
|
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
|
act_cfg=dict(type='Swish'),
|
|
init_cfg=dict(
|
|
type='Pretrained',
|
|
checkpoint='https://download.openmmlab.com/mmdetection/v2.0/'
|
|
'yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_'
|
|
'20211121_095711-4592a793.pth',
|
|
prefix='backbone.',
|
|
)),
|
|
neck=dict(
|
|
type='HybridEncoder',
|
|
in_channels=[128, 256, 512],
|
|
deepen_factor=deepen_factor,
|
|
widen_factor=widen_factor,
|
|
hidden_dim=256,
|
|
output_indices=[1, 2],
|
|
encoder_cfg=dict(
|
|
self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
|
|
ffn_cfg=dict(
|
|
embed_dims=256,
|
|
feedforward_channels=1024,
|
|
ffn_drop=0.0,
|
|
act_cfg=dict(type='GELU'))),
|
|
projector=dict(
|
|
type='ChannelMapper',
|
|
in_channels=[256, 256],
|
|
kernel_size=1,
|
|
out_channels=256,
|
|
act_cfg=None,
|
|
norm_cfg=dict(type='BN'),
|
|
num_outs=2)),
|
|
head=dict(
|
|
type='RTMOHead',
|
|
num_keypoints=17,
|
|
featmap_strides=(16, 32),
|
|
head_module_cfg=dict(
|
|
num_classes=1,
|
|
in_channels=256,
|
|
cls_feat_channels=256,
|
|
channels_per_group=36,
|
|
pose_vec_channels=256,
|
|
widen_factor=widen_factor,
|
|
stacked_convs=2,
|
|
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
|
|
act_cfg=dict(type='Swish')),
|
|
assigner=dict(
|
|
type='SimOTAAssigner',
|
|
dynamic_k_indicator='oks',
|
|
oks_calculator=dict(type='PoseOKS', metainfo=metafile),
|
|
use_keypoints_for_center=True),
|
|
prior_generator=dict(
|
|
type='MlvlPointGenerator',
|
|
centralize_points=True,
|
|
strides=[16, 32]),
|
|
dcc_cfg=dict(
|
|
in_channels=256,
|
|
feat_channels=128,
|
|
num_bins=(192, 256),
|
|
spe_channels=128,
|
|
gau_cfg=dict(
|
|
s=128,
|
|
expansion_factor=2,
|
|
dropout_rate=0.0,
|
|
drop_path=0.0,
|
|
act_fn='SiLU',
|
|
pos_enc='add')),
|
|
overlaps_power=0.5,
|
|
loss_cls=dict(
|
|
type='VariFocalLoss',
|
|
reduction='sum',
|
|
use_target_weight=True,
|
|
loss_weight=1.0),
|
|
loss_bbox=dict(
|
|
type='IoULoss',
|
|
mode='square',
|
|
eps=1e-16,
|
|
reduction='sum',
|
|
loss_weight=5.0),
|
|
loss_oks=dict(
|
|
type='OKSLoss',
|
|
reduction='none',
|
|
metainfo=metafile,
|
|
loss_weight=30.0),
|
|
loss_vis=dict(
|
|
type='BCELoss',
|
|
use_target_weight=True,
|
|
reduction='mean',
|
|
loss_weight=1.0),
|
|
loss_mle=dict(
|
|
type='MLECCLoss',
|
|
use_target_weight=True,
|
|
loss_weight=1.0,
|
|
),
|
|
loss_bbox_aux=dict(type='L1Loss', reduction='sum', loss_weight=1.0),
|
|
),
|
|
test_cfg=dict(
|
|
input_size=input_size,
|
|
score_thr=0.1,
|
|
nms_thr=0.65,
|
|
))
|