【CV数据集】智慧城市之CCPD车牌数据集

前言

最近查找车牌检测数据集，了解到CCPD数据集，CCPD 是一个开源免费的中国城市车牌识别数据集，非常不错。

具体实现

1. 数据集简介

CCPD2019数据集包含将近30万张图片、图片尺寸为720x1160x3，共包含9种类型图片，每种类型、数量及类型说明参考下表。

类型	图片数量	备注
ccpd_base	199996	正常车牌
ccpd_blur	20611	模糊车牌
ccpd_challenge	50003	比较有挑战的车牌
ccpd_db	10132	光线较亮或较暗车牌
ccpd_fn	20967	距离摄像头较远或较近
ccpd_np	3036	没上牌的新车
ccpd_rotate	10053	水平倾斜20°-50°，垂直倾斜-10°-10°
ccpd_tilt	30216	水平倾斜15°-45°，垂直倾斜-15°-45°
ccpd_weather	9999	雨天、雪天或者大雾天的车牌
	355013

数据标注格式：

CCPD的标注数据格式较为特别，是通过解析图片名的方式获取具体信息，即图像名就是标注内容。

如图片【025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg】，其文件名的含义如下：

025：车牌区域占整个画面的比例；
95_113： 车牌水平和垂直角度, 水平95°, 竖直113°
154&383_386&473：标注框左上、右下坐标，左上(154, 383), 右下(386, 473)
86&473_177&454_154&383_363&402：标注框四个角点坐标，顺序为右下、左下、左上、右上
0_0_22_27_27_33_16：车牌号码映射关系如下: 第一个0为省份 对应省份字典provinces中的’皖’,；第二个0是该车所在地的地市一级代码，对应地市一级代码字典alphabets的’A’；后5位为字母和文字, 查看车牌号ads字典，如22为Y，27为3，33为9，16为S，最终车牌号码为皖AY339S

车牌字典

# 34 省份
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫",
             "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
# 25 地市
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N',
             'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O']
# 35 车牌号码
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T',
       'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']

2. 将CCPD中车牌区域解析为coco格式

# 20240703: ccpd dataset to coco format dataset.
import os
import cv2 as cv
import numpy as np


imgw = 720
imgh = 1160
imgsz = imgw, imgh
# 34
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫",
             "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
# 25
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N',
             'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O']
# 35
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T',
       'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']

def get_plate_licenses(plate):
    """
    普通蓝牌共有7位字符；新能源车牌有8位字符：https://baike.baidu.com/item/%E8%BD%A6%E7%89%8C/8347320?fr=aladdin
    《新能源电动汽车牌照和普通牌照区别介绍》https://www.yoojia.com/ask/4-11906976349117851507.html
    新能源汽车车牌可分为三部分：省份简称(1位汉字)十地方行政区代号(1位字母)十序号(6位)
    字母“D”代表纯电动汽车；
    字母“F”代表非纯电动汽车(包括插电式混合动力和燃料电池汽车等)。
    :param plate:
    :return:
    """
    result = [provinces[int(plate[0])], alphabets[int(plate[1])]]
    result += [ads[int(p)] for p in plate[2:]]
    result = "".join(result)
    # 新能源车牌的要求，如果不是新能源车牌可以删掉这个if
    # if result[2] != 'D' and result[2] != 'F' \
    #         and result[-1] != 'D' and result[-1] != 'F':
    #     print(plate)
    #     print("Error label, Please check!")
    # print(plate, result)
    return result

def ccpd2coco(path):
    dataset_path = os.path.join(path, 'CCPD2020')
    green_path = os.path.join(dataset_path, 'ccpd_green')
    labelpath = os.path.join(dataset_path, 'green_label')
    for  path, subpaths, files in os.walk(dataset_path): 
        # print('subpaths: ', subpaths)
        # print('files: ', files)
        i = 0
        for filename in files:
            # if i>1:
            #     break
            # i = i + 1
            print(f'file in path: {path}, subpath: {subpaths}, filename: {filename}')
            annoinfo = parse_annotation(filename, labelpath)
            # display(path, annoinfo)

def display(filepath, annoinfo):
    filename = annoinfo['filename']
    bboxes = annoinfo['bboxes'] # [xyxy]
    x1, y1, x2, y2 = bboxes[0]
    img = cv.imread(os.path.join(filepath, filename))
    cv.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) # (leftup, rightdown) 
    cv.imwrite(filename, img)

def get_bbox(size, box):
    # Convert xyxy box to YOLOv5 xywh box
    dw = 1. / size[0]
    dh = 1. / size[1]
    xc = (box[0] + box[2])*0.5*dw
    yc = (box[1] + box[3])*0.5*dh
    w = (box[2]-box[0])*dw
    h = (box[3]-box[1])*dh
    return xc, yc, w, h

def parse_annotation(filename, labelpath):
    """
    :param filename: 
    :return: 返回标注信息info
    """
    # 0014128352490421455-90_90-212&467_271&489-271&489_212&489_212&467_271&467-0_0_3_30_30_25_31_32-79-4.jpg
    annotations = filename.split("-")
    # print('annotations: ', annotations)
    rate = annotations[0]  # 车牌区域占整个画面的比例；
    angle = annotations[1].split("_")  # 车牌水平和垂直角度, 水平95°, 竖直113°
    box = annotations[2].replace("&", "_").split("_")  # 标注框左上、右下坐标，左上(154, 383), 右下(386, 473)
    point = annotations[3].replace("&", "_").split("_")  # 标注框四个角点坐标，顺序为右下、左下、左上、右上
    plate = annotations[4].split("_")  # licenses 标注框四个角点坐标，顺序为右下、左下、左上、右上
    plate = get_plate_licenses(plate)
    box = [int(b) for b in box] # xyxy
    bbox = get_bbox(imgsz, box) # xywh
    point = [int(b) for b in point]
    point = np.asarray(point).reshape(-1, 2)
    bboxes = [box] # [xyxy]
    angles = [angle]
    points = [point]
    plates = [plate]
    labels = ["plate"] * len(bboxes)
    classid = 1 # plate
    annoinfo = {"filename": filename, "bboxes": bboxes, "points": points, "labels": labels, "plates": plates, "angles": angles}
    # print('rate: ', rate)
    # print('angle: ', angle)
    # print('box: ', box)
    # print('point: ', point)
    # print('plate: ', plate)
    # print('bboxes: ', bboxes)
    # print('labels: ', labels)
    # write coco info. 
    info = f"{classid} {' '.join(f'{x:.6f}' for x in bbox)}\n"
    labelname = os.path.join(labelpath, filename.replace('jpg', 'txt'))
    labelfile = open(labelname, 'w+')
    labelfile.write(info)
    labelfile.close()
    return annoinfo


if __name__ == "__main__":
    rootpath = os.path.dirname(os.path.realpath(__file__))
    ccpd2coco(rootpath)

View Code

随机获取一定比例的数据

# 20240703: ccpd dataset to coco format dataset.
import os
import cv2 as cv
import numpy as np
import random
import shutil


imgw = 720
imgh = 1160
imgsz = imgw, imgh
percent = 0.05
# 34
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫",
             "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
# 25
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N',
             'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O']
# 35
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T',
       'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
# ccpd2019all = ['ccpd_base', 'ccpd_blur', 'ccpd_challenge', 'ccpd_db', 'ccpd_fn', 'ccpd_np', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather']
ccpd2019 = ['ccpd_base', 'ccpd_blur', 'ccpd_challenge', 'ccpd_db', 'ccpd_fn', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather']

def get_plate_licenses(plate):
    """
    普通蓝牌共有7位字符；新能源车牌有8位字符：https://baike.baidu.com/item/%E8%BD%A6%E7%89%8C/8347320?fr=aladdin
    《新能源电动汽车牌照和普通牌照区别介绍》https://www.yoojia.com/ask/4-11906976349117851507.html
    新能源汽车车牌可分为三部分：省份简称(1位汉字)十地方行政区代号(1位字母)十序号(6位)
    字母“D”代表纯电动汽车；
    字母“F”代表非纯电动汽车(包括插电式混合动力和燃料电池汽车等)。
    :param plate:
    :return:
    """
    result = [provinces[int(plate[0])], alphabets[int(plate[1])]]
    result += [ads[int(p)] for p in plate[2:]]
    result = "".join(result)
    # 新能源车牌的要求，如果不是新能源车牌可以删掉这个if
    # if result[2] != 'D' and result[2] != 'F' \
    #         and result[-1] != 'D' and result[-1] != 'F':
    #     print(plate)
    #     print("Error label, Please check!")
    # print(plate, result)
    return result

def ccpd2coco2019(path):
    dataset_path = os.path.join(path, 'CCPD2019')
    # labelpath = os.path.join(dataset_path, 'blue_label')
    labelpath = os.path.join(path, 'plate/label')
    for typename in ccpd2019:
        print('typename: ', typename)
        subpath = os.path.join(dataset_path, typename)
        files = os.listdir(subpath)
        random.shuffle(files)
        num = len(files)
        print('files: ', len(files))
        i = 0
        for filename in files:
            # if i > 1: # num*percent:
            if i > num*percent:
                break
            i = i + 1
            # print(f'subpath: {subpath}, filename[0]: {filename}')
            oldpath = os.path.join(subpath, filename)
            newpath = os.path.join(path, 'plate/image', filename)
            # copy image
            shutil.copyfile(oldpath, newpath)
            # bbox label file.
            annoinfo = parse_annotation(filename, labelpath)

def ccpd2coco2020(path):
    dataset_path = os.path.join(path, 'CCPD2020')
    green_path = os.path.join(dataset_path, 'ccpd_green')
    # labelpath = os.path.join(dataset_path, 'green_label')
    labelpath = os.path.join(path, 'plate/label')
    for typename in ['test', 'train', 'val']:
        subpath = os.path.join(green_path, typename)
        files = os.listdir(subpath)
        random.shuffle(files)
        num = len(files)
        print('files: ', len(files))
        i = 0
        for filename in files:
            # if i > 1: # num*percent:
            if i > num*percent:
                break
            i = i + 1
            # print(f'subpath: {subpath}, filename[0]: {filename}')
            oldpath = os.path.join(subpath, filename)
            newpath = os.path.join(path, 'plate/image', filename)
            # copy image
            shutil.copyfile(oldpath, newpath)
            # bbox label file.
            annoinfo = parse_annotation(filename, labelpath)


def ccpd2coco(path):
    dataset_path = os.path.join(path, 'CCPD2020')
    green_path = os.path.join(dataset_path, 'ccpd_green')
    labelpath = os.path.join(dataset_path, 'green_label')
    for  path, subpaths, files in os.walk(dataset_path): 
        # print('subpaths: ', subpaths)
        # print('files: ', files)
        i = 0
        for filename in files:
            # if i>1:
            #     break
            # i = i + 1
            print(f'file in path: {path}, subpath: {subpaths}, filename: {filename}')
            annoinfo = parse_annotation(filename, labelpath)
            # display(path, annoinfo)

def display(filepath, annoinfo):
    filename = annoinfo['filename']
    bboxes = annoinfo['bboxes'] # [xyxy]
    x1, y1, x2, y2 = bboxes[0]
    img = cv.imread(os.path.join(filepath, filename))
    cv.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) # (leftup, rightdown) 
    cv.imwrite(filename, img)

def get_bbox(size, box):
    # Convert xyxy box to YOLOv5 xywh box
    dw = 1. / size[0]
    dh = 1. / size[1]
    xc = (box[0] + box[2])*0.5*dw
    yc = (box[1] + box[3])*0.5*dh
    w = (box[2]-box[0])*dw
    h = (box[3]-box[1])*dh
    return xc, yc, w, h

def parse_annotation(filename, labelpath):
    """
    :param filename: 
    :return: 返回标注信息info
    """
    # 0014128352490421455-90_90-212&467_271&489-271&489_212&489_212&467_271&467-0_0_3_30_30_25_31_32-79-4.jpg
    annotations = filename.split("-")
    # print('annotations: ', annotations)
    rate = annotations[0]  # 车牌区域占整个画面的比例；
    angle = annotations[1].split("_")  # 车牌水平和垂直角度, 水平95°, 竖直113°
    box = annotations[2].replace("&", "_").split("_")  # 标注框左上、右下坐标，左上(154, 383), 右下(386, 473)
    point = annotations[3].replace("&", "_").split("_")  # 标注框四个角点坐标，顺序为右下、左下、左上、右上
    plate = annotations[4].split("_")  # licenses 标注框四个角点坐标，顺序为右下、左下、左上、右上
    plate = get_plate_licenses(plate)
    box = [int(b) for b in box] # xyxy
    bbox = get_bbox(imgsz, box) # xywh
    point = [int(b) for b in point]
    point = np.asarray(point).reshape(-1, 2)
    bboxes = [box] # [xyxy]
    angles = [angle]
    points = [point]
    plates = [plate]
    labels = ["plate"] * len(bboxes)
    classid = 1 # plate
    annoinfo = {"filename": filename, "bboxes": bboxes, "points": points, "labels": labels, "plates": plates, "angles": angles}
    # print('rate: ', rate)
    # print('angle: ', angle)
    # print('box: ', box)
    # print('point: ', point)
    # print('plate: ', plate)
    # print('bboxes: ', bboxes)
    # print('labels: ', labels)
    # write coco info. 
    info = f"{classid} {' '.join(f'{x:.6f}' for x in bbox)}\n"
    labelname = os.path.join(labelpath, filename.replace('jpg', 'txt'))
    labelfile = open(labelname, 'w+')
    labelfile.write(info)
    labelfile.close()
    return annoinfo


if __name__ == "__main__":
    rootpath = os.path.dirname(os.path.realpath(__file__))
    ccpd2coco2020(rootpath)
    ccpd2coco2019(rootpath)

View Code

3. 数据集下载

CCPD2019：官方原始数据，主要是蓝牌数据，约34W
【下载地址】
https://pan.baidu.com/s/1i5AOjAbtkwb17Zy-NQGqkw
提取码：hm0u
CCPD2020：官方原始数据，主要是新能源绿牌数据，约1万
【下载地址】
https://pan.baidu.com/s/1JSpc9BZXFlPkXxRK4qUCyw
提取码：ol3j
【数据集官方地址】
https://github.com/detectRecog/CCPD.git

数据集目录

./
├── CCPD2019
│   ├── ccpd_base
│   ├── ccpd_blur
│   ├── ccpd_challenge
│   ├── ccpd_db
│   ├── ccpd_fn
│   ├── ccpd_np
│   ├── ccpd_rotate
│   ├── ccpd_tilt
│   ├── ccpd_weather
│   ├── LICENSE
│   ├── README.md
│   └── splits
├── CCPD2020
│   ├── ccpd_green

参考

1. 【开源数据集】智慧城市之CCPD车牌数据集；

2. 【开源数据集】智慧城市之CCPD车牌数据集_ccpd数据集-CSDN博客；

3. GitHub - detectRecog/CCPD: [ECCV 2018] CCPD: a diverse and well-annotated dataset for license plate ；

4. Zhenbo_Xu_Towards_End-to-End_License_ECCV_2018_paper；

5. CCPD车牌检测识别数据集_ccpd数据集全称-CSDN博客；

完

posted on 2024-07-04 18:34 鹅要长大阅读(300) 评论(0) 编辑收藏举报

刷新页面返回顶部

鹅要长大

【CV数据集】智慧城市之CCPD车牌数据集

公告

导航