faster #7
|
@ -0,0 +1,30 @@
|
|||
import tensorflow as tf
|
||||
import tensorlayer as tl
|
||||
from tensorlayer.layers import Module
|
||||
|
||||
class RoiPoolingConv(Module):
|
||||
def __init__(self,pool_size,**kwargs):
|
||||
self.pool_size = pool_size
|
||||
super(RoiPoolingConv,self).__init__(**kwargs)
|
||||
def build(self,input_shape):
|
||||
self.nb_channels = input_shape[0][3]
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
input_shape2 = input_shape[1]
|
||||
return None, input_shape2[1], self.pool_size, self.pool_size, self.nb_channels
|
||||
|
||||
def call(self, x, mask=None):
|
||||
assert (len(x) == 2)
|
||||
img = x[0]
|
||||
rois = x[1]
|
||||
num_rois = tf.shape(rois)[1]
|
||||
batch_size = tf.shape(rois)[0]
|
||||
|
||||
box_index = tf.expand_dims(tf.range(0, batch_size), 1)
|
||||
box_index = tf.tile(box_index, (1, num_rois))
|
||||
box_index = tf.reshape(box_index, [-1])
|
||||
|
||||
rs = tf.image.crop_and_resize(img, tf.reshape(rois, [-1, 4]), box_index, (self.pool_size, self.pool_size))
|
||||
|
||||
final_output = tf.reshape(rs, (batch_size, num_rois, self.pool_size, self.pool_size, self.nb_channels))
|
||||
return final_output
|
|
@ -0,0 +1,109 @@
|
|||
# from tensorflow.keras.layers import (Conv2D, Dense, Flatten, Input, Reshape,
|
||||
# TimeDistributed)
|
||||
# from tensorflow.keras.models import Model
|
||||
# from tensorflow.keras.initializers import RandomNormal
|
||||
import tensorflow as tf
|
||||
import tensorlayer as tl
|
||||
from tensorlayer.layers import (Conv2d,BatchNorm,Flatten,TimeDistributedLayer,Input,Reshape)
|
||||
|
||||
from nets.resnet import ResNet50, classifier_layers
|
||||
from nets.RoiPoolingConv import RoiPoolingConv
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 创建建议框网络
|
||||
# 该网络结果会对先验框进行调整获得建议框
|
||||
#----------------------------------------------------#
|
||||
def get_rpn(base_layers, num_anchors):
|
||||
#----------------------------------------------------#
|
||||
# 利用一个512通道的3x3卷积进行特征整合
|
||||
#----------------------------------------------------#
|
||||
x = Conv2D(512, (3, 3), padding='same', activation='relu', kernel_initializer=RandomNormal(stddev=0.02), name='rpn_conv1')(base_layers)
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 利用一个1x1卷积调整通道数,获得预测结果
|
||||
#----------------------------------------------------#
|
||||
x_class = Conv2D(num_anchors, (1, 1), activation='sigmoid', kernel_initializer=RandomNormal(stddev=0.02), name='rpn_out_class')(x)
|
||||
x_regr = Conv2D(num_anchors * 4, (1, 1), activation='linear', kernel_initializer=RandomNormal(stddev=0.02), name='rpn_out_regress')(x)
|
||||
|
||||
x_class = Reshape((-1,1),name="classification")(x_class)
|
||||
x_regr = Reshape((-1,4),name="regression")(x_regr)
|
||||
return [x_class, x_regr]
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 将共享特征层和建议框传入classifier网络
|
||||
# 该网络结果会对建议框进行调整获得预测框
|
||||
#----------------------------------------------------#
|
||||
def get_classifier(base_layers, input_rois, nb_classes=21, pooling_regions = 14):
|
||||
# num_rois, 38, 38, 1024 -> num_rois, 14, 14, 2048
|
||||
out_roi_pool = RoiPoolingConv(pooling_regions)([base_layers, input_rois])
|
||||
|
||||
# num_rois, 14, 14, 1024 -> num_rois, 1, 1, 2048
|
||||
out = classifier_layers(out_roi_pool)
|
||||
|
||||
# num_rois, 1, 1, 1024 -> num_rois, 2048
|
||||
out = TimeDistributed(Flatten())(out)
|
||||
|
||||
# num_rois, 1, 1, 1024 -> num_rois, nb_classes
|
||||
out_class = TimeDistributed(Dense(nb_classes, activation='softmax', kernel_initializer=RandomNormal(stddev=0.02)), name='dense_class_{}'.format(nb_classes))(out)
|
||||
# num_rois, 1, 1, 1024 -> num_rois, 4 * (nb_classes-1)
|
||||
out_regr = TimeDistributed(Dense(4 * (nb_classes-1), activation='linear', kernel_initializer=RandomNormal(stddev=0.02)), name='dense_regress_{}'.format(nb_classes))(out)
|
||||
return [out_class, out_regr]
|
||||
|
||||
def get_model(config, num_classes):
|
||||
inputs = Input(shape=(None, None, 3))
|
||||
roi_input = Input(shape=(None, 4))
|
||||
#----------------------------------------------------#
|
||||
# 假设输入为600,600,3
|
||||
# 获得一个38,38,1024的共享特征层base_layers
|
||||
#----------------------------------------------------#
|
||||
base_layers = ResNet50(inputs)
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 每个特征点9个先验框
|
||||
#----------------------------------------------------#
|
||||
num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios)
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 将共享特征层传入建议框网络
|
||||
# 该网络结果会对先验框进行调整获得建议框
|
||||
#----------------------------------------------------#
|
||||
rpn = get_rpn(base_layers, num_anchors)
|
||||
model_rpn = Model(inputs, rpn)
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 将共享特征层和建议框传入classifier网络
|
||||
# 该网络结果会对建议框进行调整获得预测框
|
||||
#----------------------------------------------------#
|
||||
classifier = get_classifier(base_layers, roi_input, num_classes, config.pooling_regions)
|
||||
|
||||
model_all = Model([inputs, roi_input], rpn + classifier)
|
||||
return model_rpn, model_all
|
||||
|
||||
def get_predict_model(config, num_classes):
|
||||
inputs = Input(shape=(None, None, 3))
|
||||
roi_input = Input(shape=(None, 4))
|
||||
feature_map_input = Input(shape=(None,None,1024))
|
||||
#----------------------------------------------------#
|
||||
# 假设输入为600,600,3
|
||||
# 获得一个38,38,1024的共享特征层base_layers
|
||||
#----------------------------------------------------#
|
||||
base_layers = ResNet50(inputs)
|
||||
#----------------------------------------------------#
|
||||
# 每个特征点9个先验框
|
||||
#----------------------------------------------------#
|
||||
num_anchors = len(config.anchor_box_scales) * len(config.anchor_box_ratios)
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 将共享特征层传入建议框网络
|
||||
# 该网络结果会对先验框进行调整获得建议框
|
||||
#----------------------------------------------------#
|
||||
rpn = get_rpn(base_layers, num_anchors)
|
||||
model_rpn = Model(inputs, rpn + [base_layers])
|
||||
|
||||
#----------------------------------------------------#
|
||||
# 将共享特征层和建议框传入classifier网络
|
||||
# 该网络结果会对建议框进行调整获得预测框
|
||||
#----------------------------------------------------#
|
||||
classifier = get_classifier(feature_map_input, roi_input, num_classes, config.pooling_regions)
|
||||
model_classifier_only = Model([feature_map_input, roi_input], classifier)
|
||||
return model_rpn, model_classifier_only
|
|
@ -0,0 +1,362 @@
|
|||
import os
|
||||
import random
|
||||
from random import shuffle
|
||||
import cv2
|
||||
import numpy as np
|
||||
import scipy.signal
|
||||
import tensorflow as tf
|
||||
import tensorlayer as tl
|
||||
from tensorlayer.layers import *
|
||||
from matplotlib import pyplot as plt
|
||||
from PIL import Image
|
||||
# from tensorflow import keras
|
||||
# from tensorflow.keras import backend as K
|
||||
# from tensorflow.keras.applications.imagenet_utils import preprocess_input
|
||||
from utils.anchors import get_anchors
|
||||
|
||||
|
||||
def rand(a=0, b=1):
|
||||
return np.random.rand() * (b - a) + a
|
||||
|
||||
|
||||
def cls_loss(ratio=3):
|
||||
def _cls_loss(y_true, y_pred):
|
||||
# ---------------------------------------------------#
|
||||
# y_true [batch_size, num_anchor, 1]
|
||||
# y_pred [batch_size, num_anchor, 1]
|
||||
# ---------------------------------------------------#
|
||||
labels = y_true
|
||||
# ---------------------------------------------------#
|
||||
# -1 是需要忽略的, 0 是背景, 1 是存在目标
|
||||
# ---------------------------------------------------#
|
||||
anchor_state = y_true
|
||||
classification = y_pred
|
||||
|
||||
# ---------------------------------------------------#
|
||||
# 获得无需忽略的所有样本
|
||||
# ---------------------------------------------------#
|
||||
indices_for_no_ignore = tf.where(tf.not_equal(anchor_state, -1))
|
||||
labels_for_no_ignore = tf.gather_nd(labels, indices_for_no_ignore)
|
||||
classification_for_no_ignore = tf.gather_nd(classification, indices_for_no_ignore)
|
||||
|
||||
cls_loss_for_no_ignore = tl.cost.binary_cross_entropy(labels_for_no_ignore, classification_for_no_ignore)
|
||||
cls_loss_for_no_ignore = tf.reduce_sum(cls_loss_for_no_ignore)
|
||||
# ---------------------------------------------------#
|
||||
# 进行标准化
|
||||
# ---------------------------------------------------#
|
||||
normalizer_no_ignore = tf.where(tf.not_equal(anchor_state, -1))
|
||||
normalizer_no_ignore = tf.cast(tf.shape(normalizer_no_ignore)[0], tf.float32)
|
||||
normalizer_no_ignore = tf.maximum(1.0, normalizer_no_ignore)
|
||||
|
||||
# 总的loss
|
||||
loss = cls_loss_for_no_ignore / normalizer_no_ignore
|
||||
return loss
|
||||
|
||||
return _cls_loss
|
||||
|
||||
|
||||
def smooth_l1(sigma=1.0):
|
||||
sigma_squared = sigma ** 2
|
||||
|
||||
def _smooth_l1(y_true, y_pred):
|
||||
# ---------------------------------------------------#
|
||||
# y_true [batch_size, num_anchor, 4+1]
|
||||
# y_pred [batch_size, num_anchor, 4]
|
||||
# ---------------------------------------------------#
|
||||
regression = y_pred
|
||||
regression_target = y_true[:, :, :-1]
|
||||
anchor_state = y_true[:, :, -1]
|
||||
|
||||
# 找到正样本
|
||||
indices = tf.where(tf.equal(anchor_state, 1))
|
||||
regression = tf.gather_nd(regression, indices)
|
||||
regression_target = tf.gather_nd(regression_target, indices)
|
||||
|
||||
# 计算smooth L1损失
|
||||
regression_diff = regression - regression_target
|
||||
regression_diff = tf.abs(regression_diff)
|
||||
regression_loss = tf.where(
|
||||
tf.less(regression_diff, 1.0 / sigma_squared),
|
||||
0.5 * sigma_squared * tf.pow(regression_diff, 2),
|
||||
regression_diff - 0.5 / sigma_squared
|
||||
)
|
||||
|
||||
# 将所获得的loss除上正样本的数量
|
||||
normalizer = tf.maximum(1, tf.shape(indices)[0])
|
||||
normalizer = tf.cast(normalizer, dtype=tf.float32)
|
||||
regression_loss = tf.reduce_sum(regression_loss) / normalizer
|
||||
return regression_loss
|
||||
|
||||
return _smooth_l1
|
||||
|
||||
|
||||
def class_loss_regr(num_classes):
|
||||
epsilon = 1e-4
|
||||
|
||||
def class_loss_regr_fixed_num(y_true, y_pred):
|
||||
x = y_true[:, :, 4 * num_classes:] - y_pred
|
||||
x_abs = tf.abs(x)
|
||||
x_bool = tf.cast(tf.less_equal(x_abs, 1.0), 'float32')
|
||||
loss = 4 * tf.reduce_sum(
|
||||
y_true[:, :, :4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / tf.reduce_sum(
|
||||
epsilon + y_true[:, :, :4 * num_classes])
|
||||
return loss
|
||||
|
||||
return class_loss_regr_fixed_num
|
||||
|
||||
|
||||
def class_loss_cls(y_true, y_pred):
|
||||
loss = tf.reduce_mean(tl.cost.cross_entropy_seq(y_true, y_pred))
|
||||
return loss
|
||||
|
||||
|
||||
def get_new_img_size(width, height, img_min_side=600):
|
||||
if width <= height:
|
||||
f = float(img_min_side) / width
|
||||
resized_height = int(f * height)
|
||||
resized_width = int(img_min_side)
|
||||
else:
|
||||
f = float(img_min_side) / height
|
||||
resized_width = int(f * width)
|
||||
resized_height = int(img_min_side)
|
||||
|
||||
return resized_width, resized_height
|
||||
|
||||
|
||||
def get_img_output_length(width, height):
|
||||
def get_output_length(input_length):
|
||||
# input_length += 6
|
||||
filter_sizes = [7, 3, 1, 1]
|
||||
padding = [3, 1, 0, 0]
|
||||
stride = 2
|
||||
for i in range(4):
|
||||
# input_length = (input_length - filter_size + stride) // stride
|
||||
input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1
|
||||
return input_length
|
||||
|
||||
return get_output_length(width), get_output_length(height)
|
||||
|
||||
|
||||
class Generator(object):
|
||||
def __init__(self, bbox_util, train_lines, num_classes, Batch_size, input_shape=[600, 600], num_regions=256):
|
||||
self.bbox_util = bbox_util
|
||||
self.train_lines = train_lines
|
||||
self.train_batches = len(train_lines)
|
||||
self.num_classes = num_classes
|
||||
self.Batch_size = Batch_size
|
||||
self.input_shape = input_shape
|
||||
self.num_regions = num_regions
|
||||
|
||||
def get_random_data(self, annotation_line, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
|
||||
'''r实时数据增强的随机预处理'''
|
||||
line = annotation_line.split()
|
||||
image = Image.open(line[0])
|
||||
iw, ih = image.size
|
||||
w, h = self.input_shape
|
||||
|
||||
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
|
||||
|
||||
if not random:
|
||||
# resize image
|
||||
scale = min(w / iw, h / ih)
|
||||
nw = int(iw * scale)
|
||||
nh = int(ih * scale)
|
||||
dx = (w - nw) // 2
|
||||
dy = (h - nh) // 2
|
||||
|
||||
image = image.resize((nw, nh), Image.BICUBIC)
|
||||
new_image = Image.new('RGB', (w, h), (128, 128, 128))
|
||||
new_image.paste(image, (dx, dy))
|
||||
image_data = np.array(new_image, np.float32)
|
||||
|
||||
# correct boxes
|
||||
box_data = np.zeros((len(box), 5))
|
||||
if len(box) > 0:
|
||||
np.random.shuffle(box)
|
||||
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
|
||||
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
|
||||
box[:, 0:2][box[:, 0:2] < 0] = 0
|
||||
box[:, 2][box[:, 2] > w] = w
|
||||
box[:, 3][box[:, 3] > h] = h
|
||||
box_w = box[:, 2] - box[:, 0]
|
||||
box_h = box[:, 3] - box[:, 1]
|
||||
box = box[np.logical_and(box_w > 1, box_h > 1)]
|
||||
box_data = np.zeros((len(box), 5))
|
||||
box_data[:len(box)] = box
|
||||
|
||||
return image_data, box_data
|
||||
|
||||
# resize image
|
||||
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
|
||||
scale = rand(.25, 2)
|
||||
if new_ar < 1:
|
||||
nh = int(scale * h)
|
||||
nw = int(nh * new_ar)
|
||||
else:
|
||||
nw = int(scale * w)
|
||||
nh = int(nw / new_ar)
|
||||
image = image.resize((nw, nh), Image.BICUBIC)
|
||||
|
||||
# place image
|
||||
dx = int(rand(0, w - nw))
|
||||
dy = int(rand(0, h - nh))
|
||||
new_image = Image.new('RGB', (w, h), (128, 128, 128))
|
||||
new_image.paste(image, (dx, dy))
|
||||
image = new_image
|
||||
|
||||
# flip image or not
|
||||
flip = rand() < .5
|
||||
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
|
||||
# distort image
|
||||
hue = rand(-hue, hue)
|
||||
sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
|
||||
val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
|
||||
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
|
||||
x[..., 0] += hue * 360
|
||||
x[..., 0][x[..., 0] > 1] -= 1
|
||||
x[..., 0][x[..., 0] < 0] += 1
|
||||
x[..., 1] *= sat
|
||||
x[..., 2] *= val
|
||||
x[x[:, :, 0] > 360, 0] = 360
|
||||
x[:, :, 1:][x[:, :, 1:] > 1] = 1
|
||||
x[x < 0] = 0
|
||||
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
|
||||
|
||||
box_data = np.zeros((len(box), 5))
|
||||
if len(box) > 0:
|
||||
np.random.shuffle(box)
|
||||
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
|
||||
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
|
||||
if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
|
||||
box[:, 0:2][box[:, 0:2] < 0] = 0
|
||||
box[:, 2][box[:, 2] > w] = w
|
||||
box[:, 3][box[:, 3] > h] = h
|
||||
box_w = box[:, 2] - box[:, 0]
|
||||
box_h = box[:, 3] - box[:, 1]
|
||||
box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
|
||||
box_data = np.zeros((len(box), 5))
|
||||
box_data[:len(box)] = box
|
||||
return image_data, box_data
|
||||
|
||||
def generate(self):
|
||||
while True:
|
||||
shuffle(self.train_lines)
|
||||
lines = self.train_lines
|
||||
|
||||
inputs = []
|
||||
target0 = []
|
||||
target1 = []
|
||||
target2 = []
|
||||
for annotation_line in lines:
|
||||
img, y = self.get_random_data(annotation_line)
|
||||
height, width, _ = np.shape(img)
|
||||
|
||||
if len(y) > 0:
|
||||
boxes = np.array(y[:, :4], dtype=np.float32)
|
||||
boxes[:, 0] = boxes[:, 0] / width
|
||||
boxes[:, 1] = boxes[:, 1] / height
|
||||
boxes[:, 2] = boxes[:, 2] / width
|
||||
boxes[:, 3] = boxes[:, 3] / height
|
||||
y[:, :4] = boxes[:, :4]
|
||||
|
||||
anchors = get_anchors(get_img_output_length(width, height), width, height)
|
||||
# ---------------------------------------------------#
|
||||
# assignment分为2个部分,它的shape为 :, 5
|
||||
# :, :4 的内容为网络应该有的回归预测结果
|
||||
# :, 4 的内容为先验框是否包含物体,默认为背景
|
||||
# ---------------------------------------------------#
|
||||
assignment = self.bbox_util.assign_boxes(y, anchors)
|
||||
|
||||
classification = assignment[:, 4]
|
||||
regression = assignment[:, :]
|
||||
|
||||
# ---------------------------------------------------#
|
||||
# 对正样本与负样本进行筛选,训练样本总和为256
|
||||
# ---------------------------------------------------#
|
||||
mask_pos = classification[:] > 0
|
||||
num_pos = len(classification[mask_pos])
|
||||
if num_pos > self.num_regions / 2:
|
||||
val_locs = random.sample(range(num_pos), int(num_pos - self.num_regions / 2))
|
||||
temp_classification = classification[mask_pos]
|
||||
temp_regression = regression[mask_pos]
|
||||
temp_classification[val_locs] = -1
|
||||
temp_regression[val_locs, -1] = -1
|
||||
classification[mask_pos] = temp_classification
|
||||
regression[mask_pos] = temp_regression
|
||||
|
||||
mask_neg = classification[:] == 0
|
||||
num_neg = len(classification[mask_neg])
|
||||
mask_pos = classification[:] > 0
|
||||
num_pos = len(classification[mask_pos])
|
||||
if len(classification[mask_neg]) + num_pos > self.num_regions:
|
||||
val_locs = random.sample(range(num_neg), int(num_neg + num_pos - self.num_regions))
|
||||
temp_classification = classification[mask_neg]
|
||||
temp_classification[val_locs] = -1
|
||||
classification[mask_neg] = temp_classification
|
||||
|
||||
inputs.append(np.array(img))
|
||||
target0.append(np.reshape(classification, [-1, 1]))
|
||||
target1.append(np.reshape(regression, [-1, 5]))
|
||||
target2.append(y)
|
||||
|
||||
if len(inputs) == self.Batch_size:
|
||||
tmp_inp = np.array(inputs)
|
||||
tmp_targets = [np.array(target0, np.float32), np.array(target1, np.float32)]
|
||||
tmp_y = target2
|
||||
yield tmp_inp/255., tmp_targets, tmp_y
|
||||
inputs = []
|
||||
target0 = []
|
||||
target1 = []
|
||||
target2 = []
|
||||
|
||||
|
||||
class LossHistory():
|
||||
def __init__(self, log_dir):
|
||||
import datetime
|
||||
curr_time = datetime.datetime.now()
|
||||
time_str = datetime.datetime.strftime(curr_time, '%Y_%m_%d_%H_%M_%S')
|
||||
self.log_dir = log_dir
|
||||
self.time_str = time_str
|
||||
self.save_path = os.path.join(self.log_dir, "loss_" + str(self.time_str))
|
||||
self.losses = []
|
||||
self.val_loss = []
|
||||
|
||||
os.makedirs(self.save_path)
|
||||
|
||||
def append_loss(self, loss, val_loss):
|
||||
self.losses.append(loss)
|
||||
self.val_loss.append(val_loss)
|
||||
with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
|
||||
f.write(str(loss))
|
||||
f.write("\n")
|
||||
with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
|
||||
f.write(str(val_loss))
|
||||
f.write("\n")
|
||||
self.loss_plot()
|
||||
|
||||
def loss_plot(self):
|
||||
iters = range(len(self.losses))
|
||||
|
||||
plt.figure()
|
||||
plt.plot(iters, self.losses, 'red', linewidth=2, label='train loss')
|
||||
plt.plot(iters, self.val_loss, 'coral', linewidth=2, label='val loss')
|
||||
try:
|
||||
if len(self.losses) < 25:
|
||||
num = 5
|
||||
else:
|
||||
num = 15
|
||||
|
||||
plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle='--', linewidth=2,
|
||||
label='smooth train loss')
|
||||
plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle='--', linewidth=2,
|
||||
label='smooth val loss')
|
||||
except:
|
||||
pass
|
||||
|
||||
plt.grid(True)
|
||||
plt.xlabel('Epoch')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend(loc="upper right")
|
||||
|
||||
plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
|
|
@ -0,0 +1,18 @@
|
|||
import numpy as np
|
||||
|
||||
def _preprocess_numpy_input(x):
|
||||
|
||||
|
||||
# 'RGB'->'BGR'
|
||||
x = x[..., ::-1]
|
||||
mean = [103.939, 116.779, 123.68]
|
||||
std = None
|
||||
x[..., 0] -= mean[0]
|
||||
x[..., 1] -= mean[1]
|
||||
x[..., 2] -= mean[2]
|
||||
if std is not None:
|
||||
x[..., 0] /= std[0]
|
||||
x[..., 1] /= std[1]
|
||||
x[..., 2] /= std[2]
|
||||
return x
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorlayer as tl
|
||||
from tensorlayer import logging
|
||||
from tensorlayer.files import (assign_weights, maybe_download_and_extract)
|
||||
from tensorlayer.layers import (BatchNorm, Conv2d, Dense, Elementwise, GlobalMeanPool2d, Input, MaxPool2d)
|
||||
from tensorlayer.layers import Module, SequentialLayer,ZeroPad2d,AdaptiveMaxPool2d,TimeDistributedLayer,LambdaLayer
|
||||
def identity_block(input_tensor, kernel_size, filters, stage, block):
|
||||
filters1, filters2, filters3 = filters
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
x = Conv2d(filters1, (1, 1),name=conv_name_base+"2a")(input_tensor)
|
||||
x = BatchNorm(is_train=False,name=bn_name_base+'2a',act="relu",num_features=filters1)(x)
|
||||
|
||||
x = Conv2d(filters2, (1, 1), name=conv_name_base + "2b")(x)
|
||||
x = BatchNorm(is_train=False, name=bn_name_base + '2b', act="relu",num_features=filters2)(x)
|
||||
|
||||
x = Conv2d(filters3, (1, 1), name=conv_name_base + "2c")(x)
|
||||
x = BatchNorm(is_train=False, name=bn_name_base + '2c',num_features=filters3)(x)
|
||||
|
||||
x = Elementwise(tl.add,act="relu")([input_tensor,x])
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
|
||||
|
||||
filters1, filters2, filters3 = filters
|
||||
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
x = Conv2d(filters1, (1, 1), strides=strides,name=conv_name_base + '2a')(input_tensor)
|
||||
x = BatchNorm(is_train=False, name=bn_name_base + '2a',num_features=filters1,act="relu")(x)
|
||||
|
||||
x = Conv2d(filters2, (kernel_size,kernel_size), padding='same', name=conv_name_base + '2b')(x)
|
||||
x = BatchNorm(is_train=False, name=bn_name_base + '2b',num_features=filters2,act="relu")(x)
|
||||
|
||||
|
||||
x = Conv2d(filters3, (1, 1), name=conv_name_base + '2c')(x)
|
||||
x = BatchNorm(is_train=False, name=bn_name_base + '2c',num_features=filters3)(x)
|
||||
|
||||
shortcut = Conv2d(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor)
|
||||
shortcut = BatchNorm(is_train=False, name=bn_name_base + '1')(shortcut)
|
||||
|
||||
x = Elementwise(tl.add,act="relu")([x, shortcut])
|
||||
return x
|
||||
|
||||
|
||||
def ResNet50(inputs):
|
||||
#-----------------------------------#
|
||||
# 假设输入进来的图片是600,600,3
|
||||
#-----------------------------------#
|
||||
img_input = inputs
|
||||
|
||||
# 600,600,3 -> 300,300,64
|
||||
x = ZeroPad2d((3, 3))(img_input)
|
||||
x = Conv2d(64, (7, 7), strides=(2, 2), name='conv1')(x)
|
||||
x = BatchNorm(is_train=False, name='bn_conv1',act="relu",num_features=64)(x)
|
||||
|
||||
|
||||
# 300,300,64 -> 150,150,64
|
||||
x = MaxPool2d((3, 3), strides=(2, 2), padding="same")(x)
|
||||
|
||||
# 150,150,64 -> 150,150,256
|
||||
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
|
||||
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
|
||||
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
|
||||
|
||||
# 150,150,256 -> 75,75,512
|
||||
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
|
||||
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
|
||||
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
|
||||
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
|
||||
|
||||
# 75,75,512 -> 38,38,1024
|
||||
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
|
||||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
|
||||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
|
||||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
|
||||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
|
||||
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
|
||||
|
||||
# 最终获得一个38,38,1024的共享特征层
|
||||
return x
|
||||
|
||||
def identity_block_td(input_tensor, kernel_size, filters, stage, block):
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
x = TimeDistributedLayer(Conv2d(nb_filter1, (1, 1),) ,name=conv_name_base + '2a')(input_tensor)
|
||||
x = TimeDistributedLayer(BatchNorm(is_train=False, act="relu"),name=bn_name_base+ '2a')(x)
|
||||
|
||||
|
||||
x = TimeDistributedLayer(Conv2d(nb_filter2, (kernel_size, kernel_size),padding='same'), name=conv_name_base + '2b')(x)
|
||||
x = TimeDistributedLayer(BatchNorm(is_train=False,act="relu"),name=bn_name_base+ '2b')(x)
|
||||
|
||||
|
||||
x = TimeDistributedLayer(Conv2d(nb_filter3, (1, 1)) ,name=conv_name_base + '2c')(x)
|
||||
x = TimeDistributedLayer(BatchNorm(is_train=False),name=bn_name_base+ '2c')(x)
|
||||
|
||||
x = Elementwise(tl.add,act="relu")([x,input_tensor])
|
||||
|
||||
|
||||
return x
|
||||
|
||||
def conv_block_td(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
|
||||
nb_filter1, nb_filter2, nb_filter3 = filters
|
||||
conv_name_base = 'res' + str(stage) + block + '_branch'
|
||||
bn_name_base = 'bn' + str(stage) + block + '_branch'
|
||||
|
||||
x = TimeDistributedLayer(Conv2d(nb_filter1, (1, 1), strides=strides), name=conv_name_base + '2a')(input_tensor)
|
||||
x = TimeDistributedLayer(BatchNorm(is_train=False,act="relu"), name=bn_name_base + '2a')(x)
|
||||
|
||||
x = TimeDistributedLayer(Conv2d(nb_filter2, (kernel_size, kernel_size), padding='same'), name=conv_name_base + '2b')(x)
|
||||
x = TimeDistributedLayer(BatchNorm(is_train=False,act="relu"),name=bn_name_base + '2b')(x)
|
||||
|
||||
|
||||
x = TimeDistributedLayer(Conv2d(nb_filter3, (1, 1)), name=conv_name_base + '2c')(x)
|
||||
x = TimeDistributedLayer(BatchNorm(is_train=False), name=bn_name_base + '2c')(x)
|
||||
|
||||
shortcut = TimeDistributedLayer(Conv2d(nb_filter3, (1, 1), strides=strides), name=conv_name_base + '1')(input_tensor)
|
||||
shortcut = TimeDistributedLayer(BatchNorm(is_train=False),name=bn_name_base + '1')(shortcut)
|
||||
|
||||
x = Elementwise(tl.add,act="relu")([x,shortcut])
|
||||
return x
|
||||
|
||||
|
||||
def classifier_layers(x):
|
||||
# num_rois, 14, 14, 1024 -> num_rois, 7, 7, 2048
|
||||
x = conv_block_td(x, 3, [512, 512, 2048], stage=5, block='a', strides=(2, 2))
|
||||
# num_rois, 7, 7, 2048 -> num_rois, 7, 7, 2048
|
||||
x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='b')
|
||||
# num_rois, 7, 7, 2048 -> num_rois, 7, 7, 2048
|
||||
x = identity_block_td(x, 3, [512, 512, 2048], stage=5, block='c')
|
||||
# num_rois, 7, 7, 2048 -> num_rois, 1, 1, 2048
|
||||
x = AdaptiveMaxPool2d((7, 7), name='avg_pool')(x)
|
||||
|
||||
return x
|
||||
|
||||
if __name__=="__main__":
|
||||
import numpy as np
|
||||
intput=Input(shape=(1,600,600,3))
|
||||
model=ResNet50(intput)
|
||||
image = (np.random.rand(1,224, 224, 3)).astype(np.float32)
|
||||
transform = tl.vision.transforms.Resize(size=(600, 600), interpolation='bilinear')
|
||||
image = transform(image)
|
||||
bb=ResNet50(image)
|
||||
print(image.shape)
|
||||
print(bb.shape)
|
Loading…
Reference in New Issue