From e4d05c85a6196760d0ca424956bee8b906818453 Mon Sep 17 00:00:00 2001
From: p32651807 <1297070681@qq.com>
Date: Fri, 22 Oct 2021 14:06:34 +0800
Subject: [PATCH] ADD file via upload

---
 frcnn_training.py | 362 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 362 insertions(+)
 create mode 100644 frcnn_training.py

diff --git a/frcnn_training.py b/frcnn_training.py
new file mode 100644
index 0000000..c708b73
--- /dev/null
+++ b/frcnn_training.py
@@ -0,0 +1,362 @@
+import os
+import random
+from random import shuffle
+import cv2
+import numpy as np
+import scipy.signal
+import tensorflow as tf
+import tensorlayer as tl
+from tensorlayer.layers import *
+from matplotlib import pyplot as plt
+from PIL import Image
+# from tensorflow import keras
+# from tensorflow.keras import backend as K
+# from tensorflow.keras.applications.imagenet_utils import preprocess_input
+from utils.anchors import get_anchors
+
+
+def rand(a=0, b=1):
+    return np.random.rand() * (b - a) + a
+
+
+def cls_loss(ratio=3):
+    def _cls_loss(y_true, y_pred):
+        # ---------------------------------------------------#
+        #   y_true [batch_size, num_anchor, 1]
+        #   y_pred [batch_size, num_anchor, 1]
+        # ---------------------------------------------------#
+        labels = y_true
+        # ---------------------------------------------------#
+        #   -1 是需要忽略的, 0 是背景, 1 是存在目标
+        # ---------------------------------------------------#
+        anchor_state = y_true
+        classification = y_pred
+
+        # ---------------------------------------------------#
+        #   获得无需忽略的所有样本
+        # ---------------------------------------------------#
+        indices_for_no_ignore = tf.where(tf.not_equal(anchor_state, -1))
+        labels_for_no_ignore = tf.gather_nd(labels, indices_for_no_ignore)
+        classification_for_no_ignore = tf.gather_nd(classification, indices_for_no_ignore)
+
+        cls_loss_for_no_ignore = tl.cost.binary_cross_entropy(labels_for_no_ignore, classification_for_no_ignore)
+        cls_loss_for_no_ignore = tf.reduce_sum(cls_loss_for_no_ignore)
+        # ---------------------------------------------------#
+        #   进行标准化
+        # ---------------------------------------------------#
+        normalizer_no_ignore = tf.where(tf.not_equal(anchor_state, -1))
+        normalizer_no_ignore = tf.cast(tf.shape(normalizer_no_ignore)[0], tf.float32)
+        normalizer_no_ignore = tf.maximum(1.0, normalizer_no_ignore)
+
+        # 总的loss
+        loss = cls_loss_for_no_ignore / normalizer_no_ignore
+        return loss
+
+    return _cls_loss
+
+
+def smooth_l1(sigma=1.0):
+    sigma_squared = sigma ** 2
+
+    def _smooth_l1(y_true, y_pred):
+        # ---------------------------------------------------#
+        #   y_true [batch_size, num_anchor, 4+1]
+        #   y_pred [batch_size, num_anchor, 4]
+        # ---------------------------------------------------#
+        regression = y_pred
+        regression_target = y_true[:, :, :-1]
+        anchor_state = y_true[:, :, -1]
+
+        # 找到正样本
+        indices = tf.where(tf.equal(anchor_state, 1))
+        regression = tf.gather_nd(regression, indices)
+        regression_target = tf.gather_nd(regression_target, indices)
+
+        # 计算smooth L1损失
+        regression_diff = regression - regression_target
+        regression_diff = tf.abs(regression_diff)
+        regression_loss = tf.where(
+            tf.less(regression_diff, 1.0 / sigma_squared),
+            0.5 * sigma_squared * tf.pow(regression_diff, 2),
+            regression_diff - 0.5 / sigma_squared
+        )
+
+        # 将所获得的loss除上正样本的数量
+        normalizer = tf.maximum(1, tf.shape(indices)[0])
+        normalizer = tf.cast(normalizer, dtype=tf.float32)
+        regression_loss = tf.reduce_sum(regression_loss) / normalizer
+        return regression_loss
+
+    return _smooth_l1
+
+
+def class_loss_regr(num_classes):
+    epsilon = 1e-4
+
+    def class_loss_regr_fixed_num(y_true, y_pred):
+        x = y_true[:, :, 4 * num_classes:] - y_pred
+        x_abs = tf.abs(x)
+        x_bool = tf.cast(tf.less_equal(x_abs, 1.0), 'float32')
+        loss = 4 * tf.reduce_sum(
+            y_true[:, :, :4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / tf.reduce_sum(
+            epsilon + y_true[:, :, :4 * num_classes])
+        return loss
+
+    return class_loss_regr_fixed_num
+
+
+def class_loss_cls(y_true, y_pred):
+    loss = tf.reduce_mean(tl.cost.cross_entropy_seq(y_true, y_pred))
+    return loss
+
+
+def get_new_img_size(width, height, img_min_side=600):
+    if width <= height:
+        f = float(img_min_side) / width
+        resized_height = int(f * height)
+        resized_width = int(img_min_side)
+    else:
+        f = float(img_min_side) / height
+        resized_width = int(f * width)
+        resized_height = int(img_min_side)
+
+    return resized_width, resized_height
+
+
+def get_img_output_length(width, height):
+    def get_output_length(input_length):
+        # input_length += 6
+        filter_sizes = [7, 3, 1, 1]
+        padding = [3, 1, 0, 0]
+        stride = 2
+        for i in range(4):
+            # input_length = (input_length - filter_size + stride) // stride
+            input_length = (input_length + 2 * padding[i] - filter_sizes[i]) // stride + 1
+        return input_length
+
+    return get_output_length(width), get_output_length(height)
+
+
+class Generator(object):
+    def __init__(self, bbox_util, train_lines, num_classes, Batch_size, input_shape=[600, 600], num_regions=256):
+        self.bbox_util = bbox_util
+        self.train_lines = train_lines
+        self.train_batches = len(train_lines)
+        self.num_classes = num_classes
+        self.Batch_size = Batch_size
+        self.input_shape = input_shape
+        self.num_regions = num_regions
+
+    def get_random_data(self, annotation_line, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
+        '''r实时数据增强的随机预处理'''
+        line = annotation_line.split()
+        image = Image.open(line[0])
+        iw, ih = image.size
+        w, h = self.input_shape
+
+        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
+
+        if not random:
+            # resize image
+            scale = min(w / iw, h / ih)
+            nw = int(iw * scale)
+            nh = int(ih * scale)
+            dx = (w - nw) // 2
+            dy = (h - nh) // 2
+
+            image = image.resize((nw, nh), Image.BICUBIC)
+            new_image = Image.new('RGB', (w, h), (128, 128, 128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image, np.float32)
+
+            # correct boxes
+            box_data = np.zeros((len(box), 5))
+            if len(box) > 0:
+                np.random.shuffle(box)
+                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
+                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+                box[:, 0:2][box[:, 0:2] < 0] = 0
+                box[:, 2][box[:, 2] > w] = w
+                box[:, 3][box[:, 3] > h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w > 1, box_h > 1)]
+                box_data = np.zeros((len(box), 5))
+                box_data[:len(box)] = box
+
+            return image_data, box_data
+
+        # resize image
+        new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
+        scale = rand(.25, 2)
+        if new_ar < 1:
+            nh = int(scale * h)
+            nw = int(nh * new_ar)
+        else:
+            nw = int(scale * w)
+            nh = int(nw / new_ar)
+        image = image.resize((nw, nh), Image.BICUBIC)
+
+        # place image
+        dx = int(rand(0, w - nw))
+        dy = int(rand(0, h - nh))
+        new_image = Image.new('RGB', (w, h), (128, 128, 128))
+        new_image.paste(image, (dx, dy))
+        image = new_image
+
+        # flip image or not
+        flip = rand() < .5
+        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
+
+        # distort image
+        hue = rand(-hue, hue)
+        sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
+        val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
+        x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
+        x[..., 0] += hue * 360
+        x[..., 0][x[..., 0] > 1] -= 1
+        x[..., 0][x[..., 0] < 0] += 1
+        x[..., 1] *= sat
+        x[..., 2] *= val
+        x[x[:, :, 0] > 360, 0] = 360
+        x[:, :, 1:][x[:, :, 1:] > 1] = 1
+        x[x < 0] = 0
+        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
+
+        box_data = np.zeros((len(box), 5))
+        if len(box) > 0:
+            np.random.shuffle(box)
+            box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
+            box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+            if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
+            box[:, 0:2][box[:, 0:2] < 0] = 0
+            box[:, 2][box[:, 2] > w] = w
+            box[:, 3][box[:, 3] > h] = h
+            box_w = box[:, 2] - box[:, 0]
+            box_h = box[:, 3] - box[:, 1]
+            box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box
+            box_data = np.zeros((len(box), 5))
+            box_data[:len(box)] = box
+        return image_data, box_data
+
+    def generate(self):
+        while True:
+            shuffle(self.train_lines)
+            lines = self.train_lines
+
+            inputs = []
+            target0 = []
+            target1 = []
+            target2 = []
+            for annotation_line in lines:
+                img, y = self.get_random_data(annotation_line)
+                height, width, _ = np.shape(img)
+
+                if len(y) > 0:
+                    boxes = np.array(y[:, :4], dtype=np.float32)
+                    boxes[:, 0] = boxes[:, 0] / width
+                    boxes[:, 1] = boxes[:, 1] / height
+                    boxes[:, 2] = boxes[:, 2] / width
+                    boxes[:, 3] = boxes[:, 3] / height
+                    y[:, :4] = boxes[:, :4]
+
+                anchors = get_anchors(get_img_output_length(width, height), width, height)
+                # ---------------------------------------------------#
+                #   assignment分为2个部分，它的shape为 :, 5
+                #   :, :4      的内容为网络应该有的回归预测结果
+                #   :,  4      的内容为先验框是否包含物体，默认为背景
+                # ---------------------------------------------------#
+                assignment = self.bbox_util.assign_boxes(y, anchors)
+
+                classification = assignment[:, 4]
+                regression = assignment[:, :]
+
+                # ---------------------------------------------------#
+                #   对正样本与负样本进行筛选，训练样本总和为256
+                # ---------------------------------------------------#
+                mask_pos = classification[:] > 0
+                num_pos = len(classification[mask_pos])
+                if num_pos > self.num_regions / 2:
+                    val_locs = random.sample(range(num_pos), int(num_pos - self.num_regions / 2))
+                    temp_classification = classification[mask_pos]
+                    temp_regression = regression[mask_pos]
+                    temp_classification[val_locs] = -1
+                    temp_regression[val_locs, -1] = -1
+                    classification[mask_pos] = temp_classification
+                    regression[mask_pos] = temp_regression
+
+                mask_neg = classification[:] == 0
+                num_neg = len(classification[mask_neg])
+                mask_pos = classification[:] > 0
+                num_pos = len(classification[mask_pos])
+                if len(classification[mask_neg]) + num_pos > self.num_regions:
+                    val_locs = random.sample(range(num_neg), int(num_neg + num_pos - self.num_regions))
+                    temp_classification = classification[mask_neg]
+                    temp_classification[val_locs] = -1
+                    classification[mask_neg] = temp_classification
+
+                inputs.append(np.array(img))
+                target0.append(np.reshape(classification, [-1, 1]))
+                target1.append(np.reshape(regression, [-1, 5]))
+                target2.append(y)
+
+                if len(inputs) == self.Batch_size:
+                    tmp_inp = np.array(inputs)
+                    tmp_targets = [np.array(target0, np.float32), np.array(target1, np.float32)]
+                    tmp_y = target2
+                    yield tmp_inp/255., tmp_targets, tmp_y
+                    inputs = []
+                    target0 = []
+                    target1 = []
+                    target2 = []
+
+
+class LossHistory():
+    def __init__(self, log_dir):
+        import datetime
+        curr_time = datetime.datetime.now()
+        time_str = datetime.datetime.strftime(curr_time, '%Y_%m_%d_%H_%M_%S')
+        self.log_dir = log_dir
+        self.time_str = time_str
+        self.save_path = os.path.join(self.log_dir, "loss_" + str(self.time_str))
+        self.losses = []
+        self.val_loss = []
+
+        os.makedirs(self.save_path)
+
+    def append_loss(self, loss, val_loss):
+        self.losses.append(loss)
+        self.val_loss.append(val_loss)
+        with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
+            f.write(str(loss))
+            f.write("\n")
+        with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
+            f.write(str(val_loss))
+            f.write("\n")
+        self.loss_plot()
+
+    def loss_plot(self):
+        iters = range(len(self.losses))
+
+        plt.figure()
+        plt.plot(iters, self.losses, 'red', linewidth=2, label='train loss')
+        plt.plot(iters, self.val_loss, 'coral', linewidth=2, label='val loss')
+        try:
+            if len(self.losses) < 25:
+                num = 5
+            else:
+                num = 15
+
+            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle='--', linewidth=2,
+                     label='smooth train loss')
+            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle='--', linewidth=2,
+                     label='smooth val loss')
+        except:
+            pass
+
+        plt.grid(True)
+        plt.xlabel('Epoch')
+        plt.ylabel('Loss')
+        plt.legend(loc="upper right")
+
+        plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))