From e4d05c85a6196760d0ca424956bee8b906818453 Mon Sep 17 00:00:00 2001 From: p32651807 <1297070681@qq.com> Date: Fri, 22 Oct 2021 14:06:34 +0800 Subject: [PATCH] ADD file via upload --- frcnn_training.py | 362 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) create mode 100644 frcnn_training.py diff --git a/frcnn_training.py b/frcnn_training.py new file mode 100644 index 0000000..c708b73 --- /dev/null +++ b/frcnn_training.py @@ -0,0 +1,362 @@ +import os +import random +from random import shuffle +import cv2 +import numpy as np +import scipy.signal +import tensorflow as tf +import tensorlayer as tl +from tensorlayer.layers import * +from matplotlib import pyplot as plt +from PIL import Image +# from tensorflow import keras +# from tensorflow.keras import backend as K +# from tensorflow.keras.applications.imagenet_utils import preprocess_input +from utils.anchors import get_anchors + + +def rand(a=0, b=1): + return np.random.rand() * (b - a) + a + + +def cls_loss(ratio=3): + def _cls_loss(y_true, y_pred): + # ---------------------------------------------------# + # y_true [batch_size, num_anchor, 1] + # y_pred [batch_size, num_anchor, 1] + # ---------------------------------------------------# + labels = y_true + # ---------------------------------------------------# + # -1 是需要忽略的, 0 是背景, 1 是存在目标 + # ---------------------------------------------------# + anchor_state = y_true + classification = y_pred + + # ---------------------------------------------------# + # 获得无需忽略的所有样本 + # ---------------------------------------------------# + indices_for_no_ignore = tf.where(tf.not_equal(anchor_state, -1)) + labels_for_no_ignore = tf.gather_nd(labels, indices_for_no_ignore) + classification_for_no_ignore = tf.gather_nd(classification, indices_for_no_ignore) + + cls_loss_for_no_ignore = tl.cost.binary_cross_entropy(labels_for_no_ignore, classification_for_no_ignore) + cls_loss_for_no_ignore = tf.reduce_sum(cls_loss_for_no_ignore) + # ---------------------------------------------------# + # 进行标准化 + # ---------------------------------------------------# + normalizer_no_ignore = tf.where(tf.not_equal(anchor_state, -1)) + normalizer_no_ignore = tf.cast(tf.shape(normalizer_no_ignore)[0], tf.float32) + normalizer_no_ignore = tf.maximum(1.0, normalizer_no_ignore) + + # 总的loss + loss = cls_loss_for_no_ignore / normalizer_no_ignore + return loss + + return _cls_loss + + +def smooth_l1(sigma=1.0): + sigma_squared = sigma ** 2 + + def _smooth_l1(y_true, y_pred): + # ---------------------------------------------------# + # y_true [batch_size, num_anchor, 4+1] + # y_pred [batch_size, num_anchor, 4] + # ---------------------------------------------------# + regression = y_pred + regression_target = y_true[:, :, :-1] + anchor_state = y_true[:, :, -1] + + # 找到正样本 + indices = tf.where(tf.equal(anchor_state, 1)) + regression = tf.gather_nd(regression, indices) + regression_target = tf.gather_nd(regression_target, indices) + + # 计算smooth L1损失 + regression_diff = regression - regression_target + regression_diff = tf.abs(regression_diff) + regression_loss = tf.where( + tf.less(regression_diff, 1.0 / sigma_squared), + 0.5 * sigma_squared * tf.pow(regression_diff, 2), + regression_diff - 0.5 / sigma_squared + ) + + # 将所获得的loss除上正样本的数量 + normalizer = tf.maximum(1, tf.shape(indices)[0]) + normalizer = tf.cast(normalizer, dtype=tf.float32) + regression_loss = tf.reduce_sum(regression_loss) / normalizer + return regression_loss + + return _smooth_l1 + + +def class_loss_regr(num_classes): + epsilon = 1e-4 + + def class_loss_regr_fixed_num(y_true, y_pred): + x = y_true[:, :, 4 * num_classes:] - y_pred + x_abs = tf.abs(x) + x_bool = tf.cast(tf.less_equal(x_abs, 1.0), 'float32') + loss = 4 * tf.reduce_sum( + y_true[:, :, :4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / tf.reduce_sum( + epsilon + y_true[:, :, :4 * num_classes]) + return loss + + return class_loss_regr_fixed_num + + +def class_loss_cls(y_true, y_pred): + loss = tf.reduce_mean(tl.cost.cross_entropy_seq(y_true, y_pred)) + return loss + + +def get_new_img_size(width, height, img_min_side=600): + if width <= height: + f = float(img_min_side) / width + resized_height = int(f * height) + resized_width = int(img_min_side) + else: + f = float(img_min_side) / height + resized_width = int(f * width) + resized_height = int(img_min_side) + + return resized_width, resized_height + + +def get_img_output_length(width, height): + def get_output_length(input_length): + # input_length += 6 + filter_sizes = [7, 3, 1, 1] + padding = [3, 1, 0, 0] + stride = 2 + for i in range(4): + # input_length = (input_length - filter_size + stride) // stride + input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1 + return input_length + + return get_output_length(width), get_output_length(height) + + +class Generator(object): + def __init__(self, bbox_util, train_lines, num_classes, Batch_size, input_shape=[600, 600], num_regions=256): + self.bbox_util = bbox_util + self.train_lines = train_lines + self.train_batches = len(train_lines) + self.num_classes = num_classes + self.Batch_size = Batch_size + self.input_shape = input_shape + self.num_regions = num_regions + + def get_random_data(self, annotation_line, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True): + '''r实时数据增强的随机预处理''' + line = annotation_line.split() + image = Image.open(line[0]) + iw, ih = image.size + w, h = self.input_shape + + box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]]) + + if not random: + # resize image + scale = min(w / iw, h / ih) + nw = int(iw * scale) + nh = int(ih * scale) + dx = (w - nw) // 2 + dy = (h - nh) // 2 + + image = image.resize((nw, nh), Image.BICUBIC) + new_image = Image.new('RGB', (w, h), (128, 128, 128)) + new_image.paste(image, (dx, dy)) + image_data = np.array(new_image, np.float32) + + # correct boxes + box_data = np.zeros((len(box), 5)) + if len(box) > 0: + np.random.shuffle(box) + box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx + box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy + box[:, 0:2][box[:, 0:2] < 0] = 0 + box[:, 2][box[:, 2] > w] = w + box[:, 3][box[:, 3] > h] = h + box_w = box[:, 2] - box[:, 0] + box_h = box[:, 3] - box[:, 1] + box = box[np.logical_and(box_w > 1, box_h > 1)] + box_data = np.zeros((len(box), 5)) + box_data[:len(box)] = box + + return image_data, box_data + + # resize image + new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter) + scale = rand(.25, 2) + if new_ar < 1: + nh = int(scale * h) + nw = int(nh * new_ar) + else: + nw = int(scale * w) + nh = int(nw / new_ar) + image = image.resize((nw, nh), Image.BICUBIC) + + # place image + dx = int(rand(0, w - nw)) + dy = int(rand(0, h - nh)) + new_image = Image.new('RGB', (w, h), (128, 128, 128)) + new_image.paste(image, (dx, dy)) + image = new_image + + # flip image or not + flip = rand() < .5 + if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT) + + # distort image + hue = rand(-hue, hue) + sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat) + val = rand(1, val) if rand() < .5 else 1 / rand(1, val) + x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV) + x[..., 0] += hue * 360 + x[..., 0][x[..., 0] > 1] -= 1 + x[..., 0][x[..., 0] < 0] += 1 + x[..., 1] *= sat + x[..., 2] *= val + x[x[:, :, 0] > 360, 0] = 360 + x[:, :, 1:][x[:, :, 1:] > 1] = 1 + x[x < 0] = 0 + image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255 + + box_data = np.zeros((len(box), 5)) + if len(box) > 0: + np.random.shuffle(box) + box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx + box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy + if flip: box[:, [0, 2]] = w - box[:, [2, 0]] + box[:, 0:2][box[:, 0:2] < 0] = 0 + box[:, 2][box[:, 2] > w] = w + box[:, 3][box[:, 3] > h] = h + box_w = box[:, 2] - box[:, 0] + box_h = box[:, 3] - box[:, 1] + box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box + box_data = np.zeros((len(box), 5)) + box_data[:len(box)] = box + return image_data, box_data + + def generate(self): + while True: + shuffle(self.train_lines) + lines = self.train_lines + + inputs = [] + target0 = [] + target1 = [] + target2 = [] + for annotation_line in lines: + img, y = self.get_random_data(annotation_line) + height, width, _ = np.shape(img) + + if len(y) > 0: + boxes = np.array(y[:, :4], dtype=np.float32) + boxes[:, 0] = boxes[:, 0] / width + boxes[:, 1] = boxes[:, 1] / height + boxes[:, 2] = boxes[:, 2] / width + boxes[:, 3] = boxes[:, 3] / height + y[:, :4] = boxes[:, :4] + + anchors = get_anchors(get_img_output_length(width, height), width, height) + # ---------------------------------------------------# + # assignment分为2个部分,它的shape为 :, 5 + # :, :4 的内容为网络应该有的回归预测结果 + # :, 4 的内容为先验框是否包含物体,默认为背景 + # ---------------------------------------------------# + assignment = self.bbox_util.assign_boxes(y, anchors) + + classification = assignment[:, 4] + regression = assignment[:, :] + + # ---------------------------------------------------# + # 对正样本与负样本进行筛选,训练样本总和为256 + # ---------------------------------------------------# + mask_pos = classification[:] > 0 + num_pos = len(classification[mask_pos]) + if num_pos > self.num_regions / 2: + val_locs = random.sample(range(num_pos), int(num_pos - self.num_regions / 2)) + temp_classification = classification[mask_pos] + temp_regression = regression[mask_pos] + temp_classification[val_locs] = -1 + temp_regression[val_locs, -1] = -1 + classification[mask_pos] = temp_classification + regression[mask_pos] = temp_regression + + mask_neg = classification[:] == 0 + num_neg = len(classification[mask_neg]) + mask_pos = classification[:] > 0 + num_pos = len(classification[mask_pos]) + if len(classification[mask_neg]) + num_pos > self.num_regions: + val_locs = random.sample(range(num_neg), int(num_neg + num_pos - self.num_regions)) + temp_classification = classification[mask_neg] + temp_classification[val_locs] = -1 + classification[mask_neg] = temp_classification + + inputs.append(np.array(img)) + target0.append(np.reshape(classification, [-1, 1])) + target1.append(np.reshape(regression, [-1, 5])) + target2.append(y) + + if len(inputs) == self.Batch_size: + tmp_inp = np.array(inputs) + tmp_targets = [np.array(target0, np.float32), np.array(target1, np.float32)] + tmp_y = target2 + yield tmp_inp/255., tmp_targets, tmp_y + inputs = [] + target0 = [] + target1 = [] + target2 = [] + + +class LossHistory(): + def __init__(self, log_dir): + import datetime + curr_time = datetime.datetime.now() + time_str = datetime.datetime.strftime(curr_time, '%Y_%m_%d_%H_%M_%S') + self.log_dir = log_dir + self.time_str = time_str + self.save_path = os.path.join(self.log_dir, "loss_" + str(self.time_str)) + self.losses = [] + self.val_loss = [] + + os.makedirs(self.save_path) + + def append_loss(self, loss, val_loss): + self.losses.append(loss) + self.val_loss.append(val_loss) + with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f: + f.write(str(loss)) + f.write("\n") + with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f: + f.write(str(val_loss)) + f.write("\n") + self.loss_plot() + + def loss_plot(self): + iters = range(len(self.losses)) + + plt.figure() + plt.plot(iters, self.losses, 'red', linewidth=2, label='train loss') + plt.plot(iters, self.val_loss, 'coral', linewidth=2, label='val loss') + try: + if len(self.losses) < 25: + num = 5 + else: + num = 15 + + plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle='--', linewidth=2, + label='smooth train loss') + plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle='--', linewidth=2, + label='smooth val loss') + except: + pass + + plt.grid(True) + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.legend(loc="upper right") + + plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))