tensorflow入门教程(二十四)Object Detection API目标检测(中)

程序员文章站 2024-03-14 08:47:52

...

1、概述

上一讲简单的讲了目标检测的原理以及Tensorflow Object Detection API的安装，这一节继续讲Tensorflow Object Detection API怎么用。

2、COCO数据集介绍

COCO数据集是微软发布的一个可以用来进行图像识别训练的数据集，图像中的目标都经过精确的segmentation进行位置定位，COCO数据集包括90类目标。Object Detection API默认提供了5个预训练模型，都是使用COCO数据集训练的，分别为

SSD + MobileNet
Inception V2 + SSD
ResNet101 + R-CNN
ResNet101 + Faster R-CNN
Inception-ResNet V2 + Faster R-CNN

3、下载模型

这个例子中，我们使用基于COCO上训练的ssd_mobilenet_v1_coco模型对任意图片进行识别。打开以下链接，

https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md

tensorflow入门教程(二十四)Object Detection API目标检测(中)

下载第一个模型。然后，将其解压在object_detection目录下。接下来，写代码。

4、导入模块

首先在my_object_detection目录下新建文件demo1.py。

#encoding:utf-8
import tensorflow as tf
import numpy as np

import os
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_utils

5、指定文件路径等

#下载下来的模型的目录
MODEL_DIR = 'object_detection/ssd_mobilenet_v1_coco_2018_01_28'
#下载下来的模型的文件
MODEL_CHECK_FILE = os.path.join(MODEL_DIR, 'frozen_inference_graph.pb')
#数据集对于的label
MODEL_LABEL_MAP = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt')
#数据集分类数量，可以打开mscoco_label_map.pbtxt文件看看
MODEL_NUM_CLASSES = 90

#这里是获取实例图片文件名，将其放到数组中
PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images'
TEST_IMAGES_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 6)]

#输出图像大小，单位是in
IMAGE_SIZE = (12, 8)

6、导入模型

tf.reset_default_graph()

#将模型读取到默认的图中
with tf.gfile.GFile(MODEL_CHECK_FILE, 'rb') as fd:
    _graph = tf.GraphDef()
    _graph.ParseFromString(fd.read())
    tf.import_graph_def(_graph, name='')

7、加载COCO数据标签

#加载COCO数据标签，将mscoco_label_map.pbtxt的内容转换成
# {1: {'id': 1, 'name': u'person'}...90: {'id': 90, 'name': u'toothbrush'}}格式
label_map = label_map_util.load_labelmap(MODEL_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=MODEL_NUM_CLASSES)
category_index = label_map_util.create_category_index(categories)

8、在图中计算，并显示结果

#将图片转化成numpy数组形式
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

#在图中开始计算
detection_graph = tf.get_default_graph()
with tf.Session(graph=detection_graph) as sess:
    for image_path in TEST_IMAGES_PATHS:
        print(image_path)
        #读取图片
        image = Image.open(image_path)
        #将图片数据转成数组
        image_np = load_image_into_numpy_array(image)
        #增加一个维度
        image_np_expanded = np.expand_dims(image_np, axis=0)
        #下面都是获取模型中的变量，直接使用就好了
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        #存放所有检测框
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        #每个检测结果的可信度
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        #每个框对应的类别
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        #检测框的个数
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        #开始计算
        (boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections],
                                                            feed_dict={image_tensor : image_np_expanded})
        #打印识别结果
        print(num_detections)
        print(boxes)
        print(classes)
        print(scores)

        #得到可视化结果
        vis_utils.visualize_boxes_and_labels_on_image_array(
            image_np,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8
        )
        #显示
        plt.figure(figsize=IMAGE_SIZE)
        plt.imshow(image_np)
        plt.show()

9、完整代码

#encoding:utf-8
import tensorflow as tf
import numpy as np

import os
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_utils

#下载下来的模型的目录
MODEL_DIR = 'object_detection/ssd_mobilenet_v1_coco_2018_01_28'
#下载下来的模型的文件
MODEL_CHECK_FILE = os.path.join(MODEL_DIR, 'frozen_inference_graph.pb')
#数据集对于的label
MODEL_LABEL_MAP = os.path.join('object_detection/data', 'mscoco_label_map.pbtxt')
#数据集分类数量，可以打开mscoco_label_map.pbtxt文件看看
MODEL_NUM_CLASSES = 90

#这里是获取实例图片文件名，将其放到数组中
PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images'
TEST_IMAGES_PATHS = [os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 6)]

#输出图像大小，单位是in
IMAGE_SIZE = (12, 8)

tf.reset_default_graph()

#将模型读取到默认的图中
with tf.gfile.GFile(MODEL_CHECK_FILE, 'rb') as fd:
    _graph = tf.GraphDef()
    _graph.ParseFromString(fd.read())
    tf.import_graph_def(_graph, name='')

#加载COCO数据标签，将mscoco_label_map.pbtxt的内容转换成
# {1: {'id': 1, 'name': u'person'}...90: {'id': 90, 'name': u'toothbrush'}}格式
label_map = label_map_util.load_labelmap(MODEL_LABEL_MAP)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=MODEL_NUM_CLASSES)
category_index = label_map_util.create_category_index(categories)

#将图片转化成numpy数组形式
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)

#在图中开始计算
detection_graph = tf.get_default_graph()
with tf.Session(graph=detection_graph) as sess:
    for image_path in TEST_IMAGES_PATHS:
        print(image_path)
        #读取图片
        image = Image.open(image_path)
        #将图片数据转成数组
        image_np = load_image_into_numpy_array(image)
        #增加一个维度
        image_np_expanded = np.expand_dims(image_np, axis=0)
        #下面都是获取模型中的变量，直接使用就好了
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        #存放所有检测框
        boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        #每个检测结果的可信度
        scores = detection_graph.get_tensor_by_name('detection_scores:0')
        #每个框对应的类别
        classes = detection_graph.get_tensor_by_name('detection_classes:0')
        #检测框的个数
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        #开始计算
        (boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections],
                                                            feed_dict={image_tensor : image_np_expanded})
        #打印识别结果
        print(num_detections)
        print(boxes)
        print(classes)
        print(scores)

        #得到可视化结果
        vis_utils.visualize_boxes_and_labels_on_image_array(
            image_np,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category_index,
            use_normalized_coordinates=True,
            line_thickness=8
        )
        #显示
        plt.figure(figsize=IMAGE_SIZE)
        plt.imshow(image_np)
        plt.show()