tensorlayer3/get_dr_txt

124 lines
5.4 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import numpy as np
import tensorflow as tf
from PIL import Image
from tqdm import tqdm
from frcnn import FRCNN
from nets.frcnn_training import get_new_img_size
from utils.anchors import get_anchors
'''
这里设置的门限值较低是因为计算map需要用到不同门限条件下的Recall和Precision值。
所以只有保留的框足够多计算的map才会更精确详情可以了解map的原理。
计算map时输出的Recall和Precision值指的是门限为0.5时的Recall和Precision值。
此处获得的./input/detection-results/里面的txt的框的数量会比直接predict多一些这是因为这里的门限低
目的是为了计算不同门限条件下的Recall和Precision值从而实现map的计算。
这里的self.iou指的是非极大抑制所用到的iou具体的可以了解非极大抑制的原理
如果低分框与高分框的iou大于这里设定的self.iou那么该低分框将会被剔除。
可能有些同学知道有0.5和0.5:0.95的mAP这里的self.iou=0.5不代表mAP0.5。
如果想要设定mAP0.x比如设定mAP0.75可以去get_map.py设定MINOVERLAP。
'''
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
class mAP_FRCNN(FRCNN):
# ---------------------------------------------------#
# 检测图片
# ---------------------------------------------------#
def detect_image(self, image_id, image):
self.confidence = 0.01
f = open("./input/detection-results/" + image_id + ".txt", "w")
# -------------------------------------#
# 转换成RGB图片可以用于灰度图预测。
# -------------------------------------#
image = image.convert("RGB")
image_shape = np.array(np.shape(image)[0:2])
old_width, old_height = image_shape[1], image_shape[0]
# ---------------------------------------------------------#
# 给原图像进行resizeresize到短边为600的大小上
# ---------------------------------------------------------#
width, height = get_new_img_size(old_width, old_height)
image = image.resize([width, height], Image.BICUBIC)
photo = np.array(image, dtype=np.float64)
# -----------------------------------------------------------#
# 图片预处理,归一化。
# -----------------------------------------------------------#
photo = np.expand_dims(photo, 0)
rpn_pred = self.model_rpn_get_pred(photo)
rpn_pred = [x.numpy() for x in rpn_pred]
# -----------------------------------------------------------#
# 将建议框网络的预测结果进行解码
# -----------------------------------------------------------#
base_feature_width, base_feature_height = self.get_img_output_length(width, height)
anchors = get_anchors([base_feature_width, base_feature_height], width, height)
rpn_results = self.bbox_util.detection_out_rpn(rpn_pred, anchors)
# -------------------------------------------------------------#
# 在获得建议框和共享特征层后将二者传入classifier中进行预测
# -------------------------------------------------------------#
base_layer = rpn_pred[2]
proposal_box = np.array(rpn_results)[:, :, 1:]
temp_ROIs = np.zeros_like(proposal_box)
temp_ROIs[:, :, [0, 1, 2, 3]] = proposal_box[:, :, [1, 0, 3, 2]]
classifier_pred = self.model_classifier_get_pred([base_layer, temp_ROIs])
classifier_pred = [x.numpy() for x in classifier_pred]
# -------------------------------------------------------------#
# 利用classifier的预测结果对建议框进行解码获得预测框
# -------------------------------------------------------------#
results = self.bbox_util.detection_out_classifier(classifier_pred, proposal_box, self.config, self.confidence)
if len(results[0]) == 0:
return
results = np.array(results[0])
boxes = results[:, :4]
top_conf = results[:, 4]
top_label_indices = results[:, 5]
boxes[:, [0, 2]] = boxes[:, [0, 2]] * old_width
boxes[:, [1, 3]] = boxes[:, [1, 3]] * old_height
for i, c in enumerate(top_label_indices):
predicted_class = self.class_names[int(c)]
score = str(top_conf[i])
left, top, right, bottom = boxes[i]
f.write("%s %s %s %s %s %s\n" % (
predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom))))
f.close()
return
frcnn = mAP_FRCNN()
image_ids = open('VOCdevkit/VOC2007/ImageSets/Main/test.txt').read().strip().split()
if not os.path.exists("./input"):
os.makedirs("./input")
if not os.path.exists("./input/detection-results"):
os.makedirs("./input/detection-results")
if not os.path.exists("./input/images-optional"):
os.makedirs("./input/images-optional")
for image_id in tqdm(image_ids):
image_path = "./VOCdevkit/VOC2007/JPEGImages/" + image_id + ".jpg"
image = Image.open(image_path)
# image.save("./input/images-optional/"+image_id+".jpg")
frcnn.detect_image(image_id, image)
print("Conversion completed!")