faster #7

Open
p32651807 wants to merge 5 commits from p32651807/tensorlayer3:nets into master
5 changed files with 672 additions and 0 deletions

30
RoiPoolingConv.py Normal file
View File

@ -0,0 +1,30 @@
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import Module
class RoiPoolingConv(Module):
def __init__(self,pool_size,**kwargs):
self.pool_size = pool_size
super(RoiPoolingConv,self).__init__(**kwargs)
def build(self,input_shape):
self.nb_channels = input_shape[0][3]
def compute_output_shape(self, input_shape):
input_shape2 = input_shape[1]
return None, input_shape2[1], self.pool_size, self.pool_size, self.nb_channels
def call(self, x, mask=None):
assert (len(x) == 2)
img = x[0]
rois = x[1]
num_rois = tf.shape(rois)[1]
batch_size = tf.shape(rois)[0]
box_index = tf.expand_dims(tf.range(0, batch_size), 1)
box_index = tf.tile(box_index, (1, num_rois))
box_index = tf.reshape(box_index, [-1])
rs = tf.image.crop_and_resize(img, tf.reshape(rois, [-1, 4]), box_index, (self.pool_size, self.pool_size))
final_output = tf.reshape(rs, (batch_size, num_rois, self.pool_size, self.pool_size, self.nb_channels))
return final_output

109
frcnn.py Normal file
View File

@ -0,0 +1,109 @@
# from tensorflow.keras.layers import (Conv2D, Dense, Flatten, Input, Reshape,
# TimeDistributed)
# from tensorflow.keras.models import Model
# from tensorflow.keras.initializers import RandomNormal
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import (Conv2d,BatchNorm,Flatten,TimeDistributedLayer,Input,Reshape)
from nets.resnet import ResNet50, classifier_layers
from nets.RoiPoolingConv import RoiPoolingConv
#----------------------------------------------------#
# 创建建议框网络
# 该网络结果会对先验框进行调整获得建议框
#----------------------------------------------------#
def get_rpn(base_layers, num_anchors):
#----------------------------------------------------#
# 利用一个512通道的3x3卷积进行特征整合
#----------------------------------------------------#
x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer=RandomNormal(stddev=0.02), name='rpn_conv1')(base_layers)
#----------------------------------------------------#
# 利用一个1x1卷积调整通道数获得预测结果
#----------------------------------------------------#
x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer=RandomNormal(stddev=0.02), name='rpn_out_class')(x)
x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer=RandomNormal(stddev=0.02), name='rpn_out_regress')(x)
x_class = Reshape((-1,1),name="classification")(x_class)
x_regr = Reshape((-1,4),name="regression")(x_regr)
return [x_class, x_regr]
#----------------------------------------------------#
# 将共享特征层和建议框传入classifier网络
# 该网络结果会对建议框进行调整获得预测框
#----------------------------------------------------#
def get_classifier(base_layers, input_rois, nb_classes=21, pooling_regions = 14):
# num_rois, 38, 38, 1024 -> num_rois, 14, 14, 2048
out_roi_pool = RoiPoolingConv(pooling_regions)([base_layers, input_rois])
# num_rois, 14, 14, 1024 -> num_rois, 1, 1, 2048
out = classifier_layers(out_roi_pool)
# num_rois, 1, 1, 1024 -> num_rois, 2048
out = TimeDistributed(Flatten())(out)
# num_rois, 1, 1, 1024 -> num_rois, nb_classes
out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer=RandomNormal(stddev=0.02)), name='dense_class_{}'.format(nb_classes))(out)
# num_rois, 1, 1, 1024 -> num_rois, 4 * (nb_classes-1)
out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer=RandomNormal(stddev=0.02)), name='dense_regress_{}'.format(nb_classes))(out)
return [out_class, out_regr]
def get_model(config, num_classes):
inputs = Input(shape=(None, None, 3))
roi_input = Input(shape=(None, 4))
#----------------------------------------------------#
# 假设输入为600,600,3
# 获得一个38,38,1024的共享特征层base_layers
#----------------------------------------------------#
base_layers = ResNet50(inputs)
#----------------------------------------------------#
# 每个特征点9个先验框
#----------------------------------------------------#
num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios)
#----------------------------------------------------#
# 将共享特征层传入建议框网络
# 该网络结果会对先验框进行调整获得建议框
#----------------------------------------------------#
rpn = get_rpn(base_layers, num_anchors)
model_rpn = Model(inputs, rpn)
#----------------------------------------------------#
# 将共享特征层和建议框传入classifier网络
# 该网络结果会对建议框进行调整获得预测框
#----------------------------------------------------#
classifier = get_classifier(base_layers, roi_input, num_classes, config.pooling_regions)
model_all = Model([inputs, roi_input], rpn + classifier)
return model_rpn, model_all
def get_predict_model(config, num_classes):
inputs = Input(shape=(None, None, 3))
roi_input = Input(shape=(None, 4))
feature_map_input = Input(shape=(None,None,1024))
#----------------------------------------------------#
# 假设输入为600,600,3
# 获得一个38,38,1024的共享特征层base_layers
#----------------------------------------------------#
base_layers = ResNet50(inputs)
#----------------------------------------------------#
# 每个特征点9个先验框
#----------------------------------------------------#
num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios)
#----------------------------------------------------#
# 将共享特征层传入建议框网络
# 该网络结果会对先验框进行调整获得建议框
#----------------------------------------------------#
rpn = get_rpn(base_layers, num_anchors)
model_rpn = Model(inputs, rpn + [base_layers])
#----------------------------------------------------#
# 将共享特征层和建议框传入classifier网络
# 该网络结果会对建议框进行调整获得预测框
#----------------------------------------------------#
classifier = get_classifier(feature_map_input, roi_input, num_classes, config.pooling_regions)
model_classifier_only = Model([feature_map_input, roi_input], classifier)
return model_rpn, model_classifier_only

362
frcnn_training.py Normal file
View File

@ -0,0 +1,362 @@
import os
import random
from random import shuffle
import cv2
import numpy as np
import scipy.signal
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import *
from matplotlib import pyplot as plt
from PIL import Image
# from tensorflow import keras
# from tensorflow.keras import backend as K
# from tensorflow.keras.applications.imagenet_utils import preprocess_input
from utils.anchors import get_anchors
def rand(a=0, b=1):
return np.random.rand() * (b - a) + a
def cls_loss(ratio=3):
def _cls_loss(y_true, y_pred):
# ---------------------------------------------------#
# y_true [batch_size, num_anchor, 1]
# y_pred [batch_size, num_anchor, 1]
# ---------------------------------------------------#
labels = y_true
# ---------------------------------------------------#
# -1 是需要忽略的, 0 是背景, 1 是存在目标
# ---------------------------------------------------#
anchor_state = y_true
classification = y_pred
# ---------------------------------------------------#
# 获得无需忽略的所有样本
# ---------------------------------------------------#
indices_for_no_ignore = tf.where(tf.not_equal(anchor_state, -1))
labels_for_no_ignore = tf.gather_nd(labels, indices_for_no_ignore)
classification_for_no_ignore = tf.gather_nd(classification, indices_for_no_ignore)
cls_loss_for_no_ignore = tl.cost.binary_cross_entropy(labels_for_no_ignore, classification_for_no_ignore)
cls_loss_for_no_ignore = tf.reduce_sum(cls_loss_for_no_ignore)
# ---------------------------------------------------#
# 进行标准化
# ---------------------------------------------------#
normalizer_no_ignore = tf.where(tf.not_equal(anchor_state, -1))
normalizer_no_ignore = tf.cast(tf.shape(normalizer_no_ignore)[0], tf.float32)
normalizer_no_ignore = tf.maximum(1.0, normalizer_no_ignore)
# 总的loss
loss = cls_loss_for_no_ignore / normalizer_no_ignore
return loss
return _cls_loss
def smooth_l1(sigma=1.0):
sigma_squared = sigma ** 2
def _smooth_l1(y_true, y_pred):
# ---------------------------------------------------#
# y_true [batch_size, num_anchor, 4+1]
# y_pred [batch_size, num_anchor, 4]
# ---------------------------------------------------#
regression = y_pred
regression_target = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1]
# 找到正样本
indices = tf.where(tf.equal(anchor_state, 1))
regression = tf.gather_nd(regression, indices)
regression_target = tf.gather_nd(regression_target, indices)
# 计算smooth L1损失
regression_diff = regression - regression_target
regression_diff = tf.abs(regression_diff)
regression_loss = tf.where(
tf.less(regression_diff, 1.0 / sigma_squared),
0.5 * sigma_squared * tf.pow(regression_diff, 2),
regression_diff - 0.5 / sigma_squared
)
# 将所获得的loss除上正样本的数量
normalizer = tf.maximum(1, tf.shape(indices)[0])
normalizer = tf.cast(normalizer, dtype=tf.float32)
regression_loss = tf.reduce_sum(regression_loss) / normalizer
return regression_loss
return _smooth_l1
def class_loss_regr(num_classes):
epsilon = 1e-4
def class_loss_regr_fixed_num(y_true, y_pred):
x = y_true[:, :, 4 * num_classes:] - y_pred
x_abs = tf.abs(x)
x_bool = tf.cast(tf.less_equal(x_abs, 1.0), 'float32')
loss = 4 * tf.reduce_sum(
y_true[:, :, :4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / tf.reduce_sum(
epsilon + y_true[:, :, :4 * num_classes])
return loss
return class_loss_regr_fixed_num
def class_loss_cls(y_true, y_pred):
loss = tf.reduce_mean(tl.cost.cross_entropy_seq(y_true, y_pred))
return loss
def get_new_img_size(width, height, img_min_side=600):
if width <= height:
f = float(img_min_side) / width
resized_height = int(f * height)
resized_width = int(img_min_side)
else:
f = float(img_min_side) / height
resized_width = int(f * width)
resized_height = int(img_min_side)
return resized_width, resized_height
def get_img_output_length(width, height):
def get_output_length(input_length):
# input_length += 6
filter_sizes = [7, 3, 1, 1]
padding = [3, 1, 0, 0]
stride = 2
for i in range(4):
# input_length = (input_length - filter_size + stride) // stride
input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1
return input_length
return get_output_length(width), get_output_length(height)
class Generator(object):
def __init__(self, bbox_util, train_lines, num_classes, Batch_size, input_shape=[600, 600], num_regions=256):
self.bbox_util = bbox_util
self.train_lines = train_lines
self.train_batches = len(train_lines)
self.num_classes = num_classes
self.Batch_size = Batch_size
self.input_shape = input_shape
self.num_regions = num_regions
def get_random_data(self, annotation_line, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
'''r实时数据增强的随机预处理'''
line = annotation_line.split()
image = Image.open(line[0])
iw, ih = image.size
w, h = self.input_shape
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
if not random:
# resize image
scale = min(w / iw, h / ih)
nw = int(iw * scale)
nh = int(ih * scale)
dx = (w - nw) // 2
dy = (h - nh) // 2
image = image.resize((nw, nh), Image.BICUBIC)
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
# correct boxes
box_data = np.zeros((len(box), 5))
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
return image_data, box_data
# resize image
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# place image
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image = new_image
# flip image or not
flip = rand() < .5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# distort image
hue = rand(-hue, hue)
sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
box_data = np.zeros((len(box), 5))
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
return image_data, box_data
def generate(self):
while True:
shuffle(self.train_lines)
lines = self.train_lines
inputs = []
target0 = []
target1 = []
target2 = []
for annotation_line in lines:
img, y = self.get_random_data(annotation_line)
height, width, _ = np.shape(img)
if len(y) > 0:
boxes = np.array(y[:, :4], dtype=np.float32)
boxes[:, 0] = boxes[:, 0] / width
boxes[:, 1] = boxes[:, 1] / height
boxes[:, 2] = boxes[:, 2] / width
boxes[:, 3] = boxes[:, 3] / height
y[:, :4] = boxes[:, :4]
anchors = get_anchors(get_img_output_length(width, height), width, height)
# ---------------------------------------------------#
# assignment分为2个部分它的shape为 :, 5
# :, :4 的内容为网络应该有的回归预测结果
# :, 4 的内容为先验框是否包含物体,默认为背景
# ---------------------------------------------------#
assignment = self.bbox_util.assign_boxes(y, anchors)
classification = assignment[:, 4]
regression = assignment[:, :]
# ---------------------------------------------------#
# 对正样本与负样本进行筛选训练样本总和为256
# ---------------------------------------------------#
mask_pos = classification[:] > 0
num_pos = len(classification[mask_pos])
if num_pos > self.num_regions / 2:
val_locs = random.sample(range(num_pos), int(num_pos - self.num_regions / 2))
temp_classification = classification[mask_pos]
temp_regression = regression[mask_pos]
temp_classification[val_locs] = -1
temp_regression[val_locs, -1] = -1
classification[mask_pos] = temp_classification
regression[mask_pos] = temp_regression
mask_neg = classification[:] == 0
num_neg = len(classification[mask_neg])
mask_pos = classification[:] > 0
num_pos = len(classification[mask_pos])
if len(classification[mask_neg]) + num_pos > self.num_regions:
val_locs = random.sample(range(num_neg), int(num_neg + num_pos - self.num_regions))
temp_classification = classification[mask_neg]
temp_classification[val_locs] = -1
classification[mask_neg] = temp_classification
inputs.append(np.array(img))
target0.append(np.reshape(classification, [-1, 1]))
target1.append(np.reshape(regression, [-1, 5]))
target2.append(y)
if len(inputs) == self.Batch_size:
tmp_inp = np.array(inputs)
tmp_targets = [np.array(target0, np.float32), np.array(target1, np.float32)]
tmp_y = target2
yield tmp_inp/255., tmp_targets, tmp_y
inputs = []
target0 = []
target1 = []
target2 = []
class LossHistory():
def __init__(self, log_dir):
import datetime
curr_time = datetime.datetime.now()
time_str = datetime.datetime.strftime(curr_time, '%Y_%m_%d_%H_%M_%S')
self.log_dir = log_dir
self.time_str = time_str
self.save_path = os.path.join(self.log_dir, "loss_" + str(self.time_str))
self.losses = []
self.val_loss = []
os.makedirs(self.save_path)
def append_loss(self, loss, val_loss):
self.losses.append(loss)
self.val_loss.append(val_loss)
with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
f.write(str(loss))
f.write("\n")
with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
f.write(str(val_loss))
f.write("\n")
self.loss_plot()
def loss_plot(self):
iters = range(len(self.losses))
plt.figure()
plt.plot(iters, self.losses, 'red', linewidth=2, label='train loss')
plt.plot(iters, self.val_loss, 'coral', linewidth=2, label='val loss')
try:
if len(self.losses) < 25:
num = 5
else:
num = 15
plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle='--', linewidth=2,
label='smooth train loss')
plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle='--', linewidth=2,
label='smooth val loss')
except:
pass
plt.grid(True)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc="upper right")
plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))

18
function.py Normal file
View File

@ -0,0 +1,18 @@
import numpy as np
def _preprocess_numpy_input(x):
# 'RGB'->'BGR'
x = x[..., ::-1]
mean = [103.939, 116.779, 123.68]
std = None
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x

153
resnet.py Normal file
View File

@ -0,0 +1,153 @@
from __future__ import print_function
import tensorflow as tf
import tensorlayer as tl
from tensorlayer import logging
from tensorlayer.files import (assign_weights, maybe_download_and_extract)
from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Elementwise, GlobalMeanPool2d, Input, MaxPool2d)
from tensorlayer.layers import Module, SequentialLayer,ZeroPad2d,AdaptiveMaxPool2d,TimeDistributedLayer,LambdaLayer
def identity_block(input_tensor, kernel_size, filters, stage, block):
filters1, filters2, filters3 = filters
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2d(filters1, (1, 1),name=conv_name_base+"2a")(input_tensor)
x = BatchNorm(is_train=False,name=bn_name_base+'2a',act="relu",num_features=filters1)(x)
x = Conv2d(filters2, (1, 1), name=conv_name_base + "2b")(x)
x = BatchNorm(is_train=False, name=bn_name_base + '2b', act="relu",num_features=filters2)(x)
x = Conv2d(filters3, (1, 1), name=conv_name_base + "2c")(x)
x = BatchNorm(is_train=False, name=bn_name_base + '2c',num_features=filters3)(x)
x = Elementwise(tl.add,act="relu")([input_tensor,x])
return x
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
filters1, filters2, filters3 = filters
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = Conv2d(filters1, (1, 1), strides=strides,name=conv_name_base + '2a')(input_tensor)
x = BatchNorm(is_train=False, name=bn_name_base + '2a',num_features=filters1,act="relu")(x)
x = Conv2d(filters2, (kernel_size,kernel_size), padding='same', name=conv_name_base + '2b')(x)
x = BatchNorm(is_train=False, name=bn_name_base + '2b',num_features=filters2,act="relu")(x)
x = Conv2d(filters3, (1, 1), name=conv_name_base + '2c')(x)
x = BatchNorm(is_train=False, name=bn_name_base + '2c',num_features=filters3)(x)
shortcut = Conv2d(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor)
shortcut = BatchNorm(is_train=False, name=bn_name_base + '1')(shortcut)
x = Elementwise(tl.add,act="relu")([x, shortcut])
return x
def ResNet50(inputs):
#-----------------------------------#
# 假设输入进来的图片是600,600,3
#-----------------------------------#
img_input = inputs
# 600,600,3 -> 300,300,64
x = ZeroPad2d((3, 3))(img_input)
x = Conv2d(64, (7, 7), strides=(2, 2), name='conv1')(x)
x = BatchNorm(is_train=False, name='bn_conv1',act="relu",num_features=64)(x)
# 300,300,64 -> 150,150,64
x = MaxPool2d((3, 3), strides=(2, 2), padding="same")(x)
# 150,150,64 -> 150,150,256
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
# 150,150,256 -> 75,75,512
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
# 75,75,512 -> 38,38,1024
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
# 最终获得一个38,38,1024的共享特征层
return x
def identity_block_td(input_tensor, kernel_size, filters, stage, block):
nb_filter1, nb_filter2, nb_filter3 = filters
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = TimeDistributedLayer(Conv2d(nb_filter1, (1, 1),) ,name=conv_name_base + '2a')(input_tensor)
x = TimeDistributedLayer(BatchNorm(is_train=False, act="relu"),name=bn_name_base+ '2a')(x)
x = TimeDistributedLayer(Conv2d(nb_filter2, (kernel_size, kernel_size),padding='same'), name=conv_name_base + '2b')(x)
x = TimeDistributedLayer(BatchNorm(is_train=False,act="relu"),name=bn_name_base+ '2b')(x)
x = TimeDistributedLayer(Conv2d(nb_filter3, (1, 1)) ,name=conv_name_base + '2c')(x)
x = TimeDistributedLayer(BatchNorm(is_train=False),name=bn_name_base+ '2c')(x)
x = Elementwise(tl.add,act="relu")([x,input_tensor])
return x
def conv_block_td(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
nb_filter1, nb_filter2, nb_filter3 = filters
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = TimeDistributedLayer(Conv2d(nb_filter1, (1, 1), strides=strides), name=conv_name_base + '2a')(input_tensor)
x = TimeDistributedLayer(BatchNorm(is_train=False,act="relu"), name=bn_name_base + '2a')(x)
x = TimeDistributedLayer(Conv2d(nb_filter2, (kernel_size, kernel_size), padding='same'), name=conv_name_base + '2b')(x)
x = TimeDistributedLayer(BatchNorm(is_train=False,act="relu"),name=bn_name_base + '2b')(x)
x = TimeDistributedLayer(Conv2d(nb_filter3, (1, 1)), name=conv_name_base + '2c')(x)
x = TimeDistributedLayer(BatchNorm(is_train=False), name=bn_name_base + '2c')(x)
shortcut = TimeDistributedLayer(Conv2d(nb_filter3, (1, 1), strides=strides), name=conv_name_base + '1')(input_tensor)
shortcut = TimeDistributedLayer(BatchNorm(is_train=False),name=bn_name_base + '1')(shortcut)
x = Elementwise(tl.add,act="relu")([x,shortcut])
return x
def classifier_layers(x):
# num_rois, 14, 14, 1024 -> num_rois, 7, 7, 2048
x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', strides=(2, 2))
# num_rois, 7, 7, 2048 -> num_rois, 7, 7, 2048
x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='b')
# num_rois, 7, 7, 2048 -> num_rois, 7, 7, 2048
x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='c')
# num_rois, 7, 7, 2048 -> num_rois, 1, 1, 2048
x = AdaptiveMaxPool2d((7, 7), name='avg_pool')(x)
return x
if __name__=="__main__":
import numpy as np
intput=Input(shape=(1,600,600,3))
model=ResNet50(intput)
image = (np.random.rand(1,224, 224, 3)).astype(np.float32)
transform = tl.vision.transforms.Resize(size=(600, 600), interpolation='bilinear')
image = transform(image)
bb=ResNet50(image)
print(image.shape)
print(bb.shape)