Tensorflow1.0之利用google object detect API检测

1，关于google object detect API的安装配置可以参考以下博客：

https://www.cnblogs.com/kxqblog/p/16049570.html

2，以下开始讲解相关检测流程步骤：

第1步，制做数据集，相关操作可以查看以下博客：

https://www.cnblogs.com/kxqblog/p/16122532.html

第2步，图像转换，

在API/research/object_detection目录下新建一个工程命名为SSD_Detect_Project。其中在放数据集与模型与配置等文件，建立train与test文件夹保存待训练与待检测图片，如下所示。

将相应的数据集文件生成record格式文件供tensorflow训练，转换代码如下：

import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET


def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df


def main():
    for folder in ['train','test']:
        image_path = os.path.join(os.getcwd(), (folder))  #这里就是需要访问的.xml的存放地址
        xml_df = xml_to_csv(image_path)                               #object_detection/images/train or test
        xml_df.to_csv((folder + '_labels.csv'), index=None)
        print('Successfully converted xml to csv.')

main()

在SSD_Detect_Project目录下执行以下语句，先生成csv文件。

python xml_to_csv.py

之后再转换record文件，转换文件如下：

"""
Usage:
  # From tensorflow/models/
  # Create train data:
  python generate_tfrecord.py --csv_input=images/train_labels.csv --image_dir=images/train --output_path=train.record

  # Create test data:
  python generate_tfrecord.py --csv_input=images/test_labels.csv  --image_dir=images/test --output_path=test.record
"""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import os
import io
import pandas as pd
import tensorflow as tf

from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('image_dir', '', 'Path to the image directory')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS


# M1，this code part need to be modified according to your real situation
# 需要修改类别标签
def class_text_to_int(row_label):
    if row_label == 'buou':
        return 1
    if row_label == 'huangquan':
        return 2
    if row_label == 'jumao':
        return 3
    if row_label == 'meiduan':
        return 4    
    if row_label == 'lihuamao':
        return 5    
    if row_label == 'xueqiaoquan':
        return 6    
    if row_label == 'labuladuo':
        return 7    
    if row_label == 'taidi':
        return 8    
    else:
        None

def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example


def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(os.getcwd(), FLAGS.image_dir)
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    for group in grouped:
        tf_example = create_tf_example(group, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
    output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))


if __name__ == '__main__':
    tf.app.run()

注意，以上代码中需要修改为自己数据集中存放的类别，之后在SSD_Detect_Project目录终端下执行以下语句即可：

python generate_tfrecord.py --csv_input=./train_labels.csv --image_dir=./train --output_path=train.record
python generate_tfrecord.py --csv_input=./test_labels.csv --image_dir=./test --output_path=test.record

第3步，制作label map文件，label_map.pbtxt

将其中的name换成自己数据的类别名称:

第4步，下载预训练权重，网址如下：

(https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf1_detection_zoo.md)，可以选择自己想要的权重进行下载。

在SSD_Detect_Project目录下建立一个models文件夹，存放下载的预训练权重。

在object_detection文件夹下的samples/configs里找到对应预训练模型的config文件，将其复制到SSD_Detect_Project目录下修改。

第5步，修改配置文件

以ssd_mobilenet_v1_coco权重为例，首先在SSD_Detect_Project目录下建立一个training文件夹，将samples/configs中的ssd_mobilenet_v1_coco.cofig拷贝到此文件夹下。

其中：

第10行修改num_classes为自己数据集的类别数

第140-154行，自行调整修改train_config里的batch_size、initial_learning_rate、step等参数

第156行，修改fine_tune_checkpoint的路径，制定到models里的对应预训练模型里，如

D:/TensorflowModels/models-r1.13.0/research/object_detection/SSD_Detect_Project/models/ssd_mobilenet_v1_coco_2018_01_28/model.ckpt

第162行，修改num_steps

第173行，修改train_input_reader里的input_path、label_map_path,此为之前生成的train.record与label_map.pbtxt路径。

第187行，修改eval_input_reader里的input_path、label_map_path,此为之前生成的test.record与label_map.pbtxt路径。

第187行，修改eval_config里的num_examples

第6步，训练网络

在API/research/object_detection目录下执行以下命令生成网络：

# 调用model_main.py执行
python model_main.py --model_dir=SSD_Detect_Project/model_save/ --pipeline_config_path=SSD_Detect_Project/training/ssd_mobilenet_v1_coco.config --num_train_steps=60000 --num_eval_steps=20 --alsologtostderr
# 也可以使用旧版本训练
python legacy/train.py --train_dir=SSD_Detect_Project/model_save/ --pipeline_config_path=SSD_Detect_Project/ssd_mobilenet_v1_coco.config --logtostderr

其中model_dir是生成文件的存放地址，pipeline_config_path就是配置文件的存放地址

问题：

“error：No modul named pycocotools”

解决：

pip install pycocotools

问题：

NameError: name 'np' is not defined

解决：

添加了numpy模块后故障还在，则删除object_detection同目录下的object_detection.egg.info文件，并重新python setup.py install 生成拓展文件。

问题：

UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to ini

解决：

legacy/train.py文件下加入以下语句：
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

注意这里我经过测试用第一条命令(model_main.py)有运行时不出面计算过程，用了旧版本的没有问题。命令如下：

python legacy/train.py --train_dir=SSD_Detect_Project/model_save/ --pipeline_config_path=SSD_Detect_Project/training/ssd_mobilenet_v1_coco.config --alsologtostderr

至此，运行成功。

第7步，冻结模型，在API/research/object_detection目录下执行以下命令冻结网络：

python export_inference_graph.py --input_type image_tensor --pipeline_config_path SSD_Detect_Project/training/ssd_mobilenet_v1_coco.config --trained_checkpoint_prefix SSD_Detect_Project/model_save/model.ckpt --output_directory SSD_Detect_Project/model_save/

其中trained_checkpoint_prefix代表checkpoint文件的存放位置，output_directory表示生成的.pb文件的路径。

第7步，测试模型，选取一张图片测试模型性能，测试代码如下，：

执行以下语句：python testimg.py

######## Image Object Detection Using Tensorflow-trained Classifier #########
#
# Author: Evan Juras
# Date: 1/15/18
# Description:
# This program uses a TensorFlow-trained classifier to perform object detection.
# It loads the classifier uses it to perform object detection on an image.
# It draws boxes and scores around the objects of interest in the image.

## Some of the code is copied from Google's example at
## https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb

## and some is copied from Dat Tran's example at
## https://github.com/datitran/object_detector_app/blob/master/object_detection_app.py

## but I changed it to make it more understandable to me.

# Import packages
import os
import cv2
import numpy as np
import tensorflow as tf
import sys

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")

# Import utilites
from utils import label_map_util
from utils import visualization_utils as vis_util

# Name of the directory containing the object detection module we're using
MODEL_NAME = 'model_save'
IMAGE_NAME = 'buou.jpg'

# Grab path to current working directory
CWD_PATH = os.getcwd()

# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')

# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,'label_map.pbtxt')

# Path to image
PATH_TO_IMAGE = os.path.join(CWD_PATH,IMAGE_NAME)

# Number of classes the object detector can identify
NUM_CLASSES = 8

# Load the label map.
# Label maps map indices to category names, so that when our convolution
# network predicts `5`, we know that this corresponds to `king`.
# Here we use internal utility functions, but anything that returns a
# dictionary mapping integers to appropriate string labels would be fine
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

# Define input and output tensors (i.e. data) for the object detection classifier

# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')

# Load image using OpenCV and
# expand image dimensions to have shape: [1, None, None, 3]
# i.e. a single-column array, where each item in the column has the pixel RGB value
image = cv2.imread(PATH_TO_IMAGE)
image_expanded = np.expand_dims(image, axis=0)

# Perform the actual detection by running the model with the image as input
(boxes, scores, classes, num) = sess.run(
    [detection_boxes, detection_scores, detection_classes, num_detections],
    feed_dict={image_tensor: image_expanded})

# Draw the results of the detection (aka 'visulaize the results')

vis_util.visualize_boxes_and_labels_on_image_array(
    image,
    np.squeeze(boxes),
    np.squeeze(classes).astype(np.int32),
    np.squeeze(scores),
    category_index,
    use_normalized_coordinates=True,
    line_thickness=8,
    min_score_thresh=0.80)

# All the results have been drawn on image. Now display the image.
cv2.imshow('Object detector', image)

# Press any key to close the image
cv2.waitKey(0)

# Clean up
cv2.destroyAllWindows()

第8步，评估模型性能：

首先修改object_detection\utils文件夹下的object_detection_evaluation.py文件，将其中的unicode替换为str再删除object_detection同目录下的object_detection.egg.info文件，并重新python setup.py install 生成拓展文件。

之后运行以下命令：

python legacy/eval.py  --logtostderr  --pipeline_config_path=SSD_Detect_Project/training/ssd_mobilenet_v1_coco.config --checkpoint_dir=SSD_Detect_Project/model_save/  --eval_dir=SSD_Detect_Project/eval/

其中pipeline_config_path为之前配置文件路径，checkpoint_dir为模型保存路径，eval_dir为自己想要的评估文件保存路径。

还遇到了以下错误：

ValueError: Image with id b'000001.jpg' already added

解决办法：

配置文件中eval_config选项中测试文件的数量有误，改为正确的数字，之后删除eval文件夹中的配置文件重新执行以上命令：

并加入以下语句：

metrics_set: "coco_detection_metrics"

执行后，如下所示：

参考博客：

https://blog.51cto.com/u_13977270/3395652

posted @ 2022-06-13 11:09 Deceiver_Ker 阅读(109) 评论(0) 编辑收藏举报

刷新页面返回顶部

Ker

Tensorflow1.0之利用google object detect API检测

Tensorflow1.0之利用google object detect API检测

公告