【CV数据集】智慧城市之CCPD车牌数据集
前言
最近查找车牌检测数据集,了解到CCPD数据集,CCPD 是一个开源免费的中国城市车牌识别数据集,非常不错。
具体实现
1. 数据集简介
CCPD2019数据集包含将近30万张图片、图片尺寸为720x1160x3,共包含9种类型图片,每种类型、数量及类型说明参考下表。
类型
|
图片数量
|
备注
|
ccpd_base
|
199996
|
正常车牌
|
ccpd_blur
|
20611
|
模糊车牌
|
ccpd_challenge
|
50003
|
比较有挑战的车牌
|
ccpd_db
|
10132
|
光线较亮或较暗车牌
|
ccpd_fn
|
20967
|
距离摄像头较远或较近
|
ccpd_np
|
3036
|
没上牌的新车
|
ccpd_rotate
|
10053
|
水平倾斜20°-50°,垂直倾斜-10°-10°
|
ccpd_tilt
|
30216
|
水平倾斜15°-45°,垂直倾斜-15°-45°
|
ccpd_weather
|
9999
|
雨天、雪天或者大雾天的车牌
|
|
355013
|
|
数据标注格式:
CCPD的标注数据格式较为特别,是通过解析图片名的方式获取具体信息,即图像名就是标注内容。
如图片【025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg】,其文件名的含义如下:
025:车牌区域占整个画面的比例; 95_113: 车牌水平和垂直角度, 水平95°, 竖直113° 154&383_386&473:标注框左上、右下坐标,左上(154, 383), 右下(386, 473) 86&473_177&454_154&383_363&402:标注框四个角点坐标,顺序为右下、左下、左上、右上 0_0_22_27_27_33_16:车牌号码映射关系如下: 第一个0为省份 对应省份字典provinces中的’皖’,;第二个0是该车所在地的地市一级代码,对应地市一级代码字典alphabets的’A’;后5位为字母和文字, 查看车牌号ads字典,如22为Y,27为3,33为9,16为S,最终车牌号码为皖AY339S
车牌字典
# 34 省份 provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"] # 25 地市 alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O'] # 35 车牌号码 ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
2. 将CCPD中车牌区域解析为coco格式
# 20240703: ccpd dataset to coco format dataset. import os import cv2 as cv import numpy as np imgw = 720 imgh = 1160 imgsz = imgw, imgh # 34 provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"] # 25 alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O'] # 35 ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O'] def get_plate_licenses(plate): """ 普通蓝牌共有7位字符;新能源车牌有8位字符:https://baike.baidu.com/item/%E8%BD%A6%E7%89%8C/8347320?fr=aladdin 《新能源电动汽车牌照和普通牌照区别介绍》https://www.yoojia.com/ask/4-11906976349117851507.html 新能源汽车车牌可分为三部分:省份简称(1位汉字)十地方行政区代号(1位字母)十序号(6位) 字母“D”代表纯电动汽车; 字母“F”代表非纯电动汽车(包括插电式混合动力和燃料电池汽车等)。 :param plate: :return: """ result = [provinces[int(plate[0])], alphabets[int(plate[1])]] result += [ads[int(p)] for p in plate[2:]] result = "".join(result) # 新能源车牌的要求,如果不是新能源车牌可以删掉这个if # if result[2] != 'D' and result[2] != 'F' \ # and result[-1] != 'D' and result[-1] != 'F': # print(plate) # print("Error label, Please check!") # print(plate, result) return result def ccpd2coco(path): dataset_path = os.path.join(path, 'CCPD2020') green_path = os.path.join(dataset_path, 'ccpd_green') labelpath = os.path.join(dataset_path, 'green_label') for path, subpaths, files in os.walk(dataset_path): # print('subpaths: ', subpaths) # print('files: ', files) i = 0 for filename in files: # if i>1: # break # i = i + 1 print(f'file in path: {path}, subpath: {subpaths}, filename: {filename}') annoinfo = parse_annotation(filename, labelpath) # display(path, annoinfo) def display(filepath, annoinfo): filename = annoinfo['filename'] bboxes = annoinfo['bboxes'] # [xyxy] x1, y1, x2, y2 = bboxes[0] img = cv.imread(os.path.join(filepath, filename)) cv.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) # (leftup, rightdown) cv.imwrite(filename, img) def get_bbox(size, box): # Convert xyxy box to YOLOv5 xywh box dw = 1. / size[0] dh = 1. / size[1] xc = (box[0] + box[2])*0.5*dw yc = (box[1] + box[3])*0.5*dh w = (box[2]-box[0])*dw h = (box[3]-box[1])*dh return xc, yc, w, h def parse_annotation(filename, labelpath): """ :param filename: :return: 返回标注信息info """ # 0014128352490421455-90_90-212&467_271&489-271&489_212&489_212&467_271&467-0_0_3_30_30_25_31_32-79-4.jpg annotations = filename.split("-") # print('annotations: ', annotations) rate = annotations[0] # 车牌区域占整个画面的比例; angle = annotations[1].split("_") # 车牌水平和垂直角度, 水平95°, 竖直113° box = annotations[2].replace("&", "_").split("_") # 标注框左上、右下坐标,左上(154, 383), 右下(386, 473) point = annotations[3].replace("&", "_").split("_") # 标注框四个角点坐标,顺序为右下、左下、左上、右上 plate = annotations[4].split("_") # licenses 标注框四个角点坐标,顺序为右下、左下、左上、右上 plate = get_plate_licenses(plate) box = [int(b) for b in box] # xyxy bbox = get_bbox(imgsz, box) # xywh point = [int(b) for b in point] point = np.asarray(point).reshape(-1, 2) bboxes = [box] # [xyxy] angles = [angle] points = [point] plates = [plate] labels = ["plate"] * len(bboxes) classid = 1 # plate annoinfo = {"filename": filename, "bboxes": bboxes, "points": points, "labels": labels, "plates": plates, "angles": angles} # print('rate: ', rate) # print('angle: ', angle) # print('box: ', box) # print('point: ', point) # print('plate: ', plate) # print('bboxes: ', bboxes) # print('labels: ', labels) # write coco info. info = f"{classid} {' '.join(f'{x:.6f}' for x in bbox)}\n" labelname = os.path.join(labelpath, filename.replace('jpg', 'txt')) labelfile = open(labelname, 'w+') labelfile.write(info) labelfile.close() return annoinfo if __name__ == "__main__": rootpath = os.path.dirname(os.path.realpath(__file__)) ccpd2coco(rootpath)
随机获取一定比例的数据
# 20240703: ccpd dataset to coco format dataset. import os import cv2 as cv import numpy as np import random import shutil imgw = 720 imgh = 1160 imgsz = imgw, imgh percent = 0.05 # 34 provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"] # 25 alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V','W', 'X', 'Y', 'Z', 'O'] # 35 ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O'] # ccpd2019all = ['ccpd_base', 'ccpd_blur', 'ccpd_challenge', 'ccpd_db', 'ccpd_fn', 'ccpd_np', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather'] ccpd2019 = ['ccpd_base', 'ccpd_blur', 'ccpd_challenge', 'ccpd_db', 'ccpd_fn', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather'] def get_plate_licenses(plate): """ 普通蓝牌共有7位字符;新能源车牌有8位字符:https://baike.baidu.com/item/%E8%BD%A6%E7%89%8C/8347320?fr=aladdin 《新能源电动汽车牌照和普通牌照区别介绍》https://www.yoojia.com/ask/4-11906976349117851507.html 新能源汽车车牌可分为三部分:省份简称(1位汉字)十地方行政区代号(1位字母)十序号(6位) 字母“D”代表纯电动汽车; 字母“F”代表非纯电动汽车(包括插电式混合动力和燃料电池汽车等)。 :param plate: :return: """ result = [provinces[int(plate[0])], alphabets[int(plate[1])]] result += [ads[int(p)] for p in plate[2:]] result = "".join(result) # 新能源车牌的要求,如果不是新能源车牌可以删掉这个if # if result[2] != 'D' and result[2] != 'F' \ # and result[-1] != 'D' and result[-1] != 'F': # print(plate) # print("Error label, Please check!") # print(plate, result) return result def ccpd2coco2019(path): dataset_path = os.path.join(path, 'CCPD2019') # labelpath = os.path.join(dataset_path, 'blue_label') labelpath = os.path.join(path, 'plate/label') for typename in ccpd2019: print('typename: ', typename) subpath = os.path.join(dataset_path, typename) files = os.listdir(subpath) random.shuffle(files) num = len(files) print('files: ', len(files)) i = 0 for filename in files: # if i > 1: # num*percent: if i > num*percent: break i = i + 1 # print(f'subpath: {subpath}, filename[0]: {filename}') oldpath = os.path.join(subpath, filename) newpath = os.path.join(path, 'plate/image', filename) # copy image shutil.copyfile(oldpath, newpath) # bbox label file. annoinfo = parse_annotation(filename, labelpath) def ccpd2coco2020(path): dataset_path = os.path.join(path, 'CCPD2020') green_path = os.path.join(dataset_path, 'ccpd_green') # labelpath = os.path.join(dataset_path, 'green_label') labelpath = os.path.join(path, 'plate/label') for typename in ['test', 'train', 'val']: subpath = os.path.join(green_path, typename) files = os.listdir(subpath) random.shuffle(files) num = len(files) print('files: ', len(files)) i = 0 for filename in files: # if i > 1: # num*percent: if i > num*percent: break i = i + 1 # print(f'subpath: {subpath}, filename[0]: {filename}') oldpath = os.path.join(subpath, filename) newpath = os.path.join(path, 'plate/image', filename) # copy image shutil.copyfile(oldpath, newpath) # bbox label file. annoinfo = parse_annotation(filename, labelpath) def ccpd2coco(path): dataset_path = os.path.join(path, 'CCPD2020') green_path = os.path.join(dataset_path, 'ccpd_green') labelpath = os.path.join(dataset_path, 'green_label') for path, subpaths, files in os.walk(dataset_path): # print('subpaths: ', subpaths) # print('files: ', files) i = 0 for filename in files: # if i>1: # break # i = i + 1 print(f'file in path: {path}, subpath: {subpaths}, filename: {filename}') annoinfo = parse_annotation(filename, labelpath) # display(path, annoinfo) def display(filepath, annoinfo): filename = annoinfo['filename'] bboxes = annoinfo['bboxes'] # [xyxy] x1, y1, x2, y2 = bboxes[0] img = cv.imread(os.path.join(filepath, filename)) cv.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0)) # (leftup, rightdown) cv.imwrite(filename, img) def get_bbox(size, box): # Convert xyxy box to YOLOv5 xywh box dw = 1. / size[0] dh = 1. / size[1] xc = (box[0] + box[2])*0.5*dw yc = (box[1] + box[3])*0.5*dh w = (box[2]-box[0])*dw h = (box[3]-box[1])*dh return xc, yc, w, h def parse_annotation(filename, labelpath): """ :param filename: :return: 返回标注信息info """ # 0014128352490421455-90_90-212&467_271&489-271&489_212&489_212&467_271&467-0_0_3_30_30_25_31_32-79-4.jpg annotations = filename.split("-") # print('annotations: ', annotations) rate = annotations[0] # 车牌区域占整个画面的比例; angle = annotations[1].split("_") # 车牌水平和垂直角度, 水平95°, 竖直113° box = annotations[2].replace("&", "_").split("_") # 标注框左上、右下坐标,左上(154, 383), 右下(386, 473) point = annotations[3].replace("&", "_").split("_") # 标注框四个角点坐标,顺序为右下、左下、左上、右上 plate = annotations[4].split("_") # licenses 标注框四个角点坐标,顺序为右下、左下、左上、右上 plate = get_plate_licenses(plate) box = [int(b) for b in box] # xyxy bbox = get_bbox(imgsz, box) # xywh point = [int(b) for b in point] point = np.asarray(point).reshape(-1, 2) bboxes = [box] # [xyxy] angles = [angle] points = [point] plates = [plate] labels = ["plate"] * len(bboxes) classid = 1 # plate annoinfo = {"filename": filename, "bboxes": bboxes, "points": points, "labels": labels, "plates": plates, "angles": angles} # print('rate: ', rate) # print('angle: ', angle) # print('box: ', box) # print('point: ', point) # print('plate: ', plate) # print('bboxes: ', bboxes) # print('labels: ', labels) # write coco info. info = f"{classid} {' '.join(f'{x:.6f}' for x in bbox)}\n" labelname = os.path.join(labelpath, filename.replace('jpg', 'txt')) labelfile = open(labelname, 'w+') labelfile.write(info) labelfile.close() return annoinfo if __name__ == "__main__": rootpath = os.path.dirname(os.path.realpath(__file__)) ccpd2coco2020(rootpath) ccpd2coco2019(rootpath)
3. 数据集下载
CCPD2019:官方原始数据,主要是蓝牌数据,约34W 【下载地址】 https://pan.baidu.com/s/1i5AOjAbtkwb17Zy-NQGqkw 提取码:hm0u CCPD2020:官方原始数据,主要是新能源绿牌数据,约1万 【下载地址】 https://pan.baidu.com/s/1JSpc9BZXFlPkXxRK4qUCyw 提取码:ol3j 【数据集官方地址】 https://github.com/detectRecog/CCPD.git
数据集目录
./ ├── CCPD2019 │ ├── ccpd_base │ ├── ccpd_blur │ ├── ccpd_challenge │ ├── ccpd_db │ ├── ccpd_fn │ ├── ccpd_np │ ├── ccpd_rotate │ ├── ccpd_tilt │ ├── ccpd_weather │ ├── LICENSE │ ├── README.md │ └── splits ├── CCPD2020 │ ├── ccpd_green
参考
各美其美,美美与共,不和他人作比较,不对他人有期待,不批判他人,不钻牛角尖。
心正意诚,做自己该做的事情,做自己喜欢做的事情,安静做一枚有思想的技术媛。
版权声明,转载请注明出处:https://www.cnblogs.com/happyamyhope/
心正意诚,做自己该做的事情,做自己喜欢做的事情,安静做一枚有思想的技术媛。
版权声明,转载请注明出处:https://www.cnblogs.com/happyamyhope/