[转]Python之基于十六进制判断文件类型
核心代码:

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Author : sukimport structfrom io import BytesIO# 支持文件类型# 用16进制字符串的目的是可以知道文件头是多少字节# 各种文件头的长度不一样,少则2字符,长则8字符def typeList(types): type_dict = {'jpg': ['FFD8FFE000104A464946'], 'png': ['89504E470D0A1A0A0000'], 'gif': ['47494638396126026F01'], 'tif': ['49492A00227105008037'], 'bmp': ['424D8E1B030000000000'], 'dwg': ['41433130313500000000'], 'html': ['3C21444F435459504520'], 'htm': ['3C21646F637479706520'], 'css': ['48544D4C207B0D0A0942'], 'js': ['696B2E71623D696B2E71'], 'rtf': ['7B5C727466315C616E73'], 'psd': ['38425053000100000000'], 'eml': ['46726F6D3A203D3F6762'], 'wps': ['D0CF11E0A1B11AE10000'], 'mdb': ['5374616E64617264204A'], 'ps': '[252150532D41646F6265]', 'pdf': ['255044462D312E'], 'rmvb': ['2E524D46000000120001'], 'flv': ['464C5601050000000900'], 'mp4': ['00000020667479706D70'], 'mp3': ['49443303000000002176'], 'mpg': ['000001BA210001000180'], 'wmv': ['3026B2758E66CF11A6D9'], 'wav': ['52494646E27807005741'], 'avi': ['52494646D07D60074156'], 'mid': ['4D546864000000060001'], 'zip': ['504B0304140000000800', '504B0304140000080800', '504B03040A0000080000'], 'rar': ['526172211A0700CF9073'], 'ini': ['235468697320636F6E66'], 'jar': ['504B03040A0000000000'], 'exe': ['4D5A9000030000000400'], 'jsp': ['3C25402070616765206C'], 'mf': ['4D616E69666573742D56'], 'xml': ['3C3F786D6C2076657273'], 'sql': ['494E5345525420494E54'], 'java': ['7061636B616765207765'], 'bat': ['406563686F206F66660D'], 'gz': ['1F8B0800000000000000'], 'properties': ['6C6F67346A2E726F6F74'], 'class': ['CAFEBABE0000002E0041'], 'chm': ['49545346030000006000'], 'mxp': ['04000000010000001300'], 'docx': ['504B0304140006000800', '504B03040A0000000000'], 'torrent': ['6431303A637265617465'], 'mov': ['6D6F6F76'], 'wpd': ['FF575043'], 'dbx': ['CFAD12FEC5FD746F'], 'pst': ['2142444E'], 'qdf': ['AC9EBD8F'], 'pwl': ['E3828596'], 'ram': ['2E7261FD'] } ret = {} for k_hex, v_prefix in type_dict.items(): if k_hex in types: ret[k_hex] = v_prefix return ret# 字节码转16进制字符串def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper()# 获取文件类型def file_type(filename): binfile = open(filename, 'rb') # 必需二制字读取 tl = typeList(types=["jpg", "zip", "docx"]) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要读多少字节 binfile.seek(0) # 每次读取都要回到文件头,不然会一直往后读取 hbytes = struct.unpack_from("B" * numOfBytes, binfile.read(numOfBytes)) # 一个 "B"表示一个字节 f_hcode = bytes2hex(hbytes) # 如果判断不出来,打印出这个值,往字典增加即可 # print("上传数据流hex", s_hcode, '=', "代码字典hex", hcode) # 如果判断不出来,打印出这个值,往字典增加即可 if f_hcode == hcode: flag = True break if flag: ftype = type_name break binfile.close() return ftype# 获取字节流类型def stream_type(stream, types): """ :param stream:流数据 :param types:需要判断文件类型,格式:["jpg","jpn"] :return: """ tl = typeList(types=types) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要读多少字节 hbytes = struct.unpack_from("B" * numOfBytes, stream[0:numOfBytes]) # 一个 "B"表示一个字节 s_hcode = bytes2hex(hbytes) # print("上传数据流hex", s_hcode, '=', "代码字典hex", hcode) # 如果判断不出来,打印出这个值,往字典增加即可 if s_hcode == hcode: flag = True break if flag: ftype = type_name break return ftypedef stream_split(stream, count=3): """ 主要处理流是分段获取的数据 :param stream: 块流 :param count: 取多少段合成来判断类型,默认三段 :return: """ block_stream = BytesIO() temp = 1 for block in stream: block_stream.write(block) if temp == count: break temp += 1 return block_stream.getvalue()
type_dict字典,根据自己上传的文件,来填写,数据来自互联网。
基于Flask的上传示例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
@index .route( '/upload' , methods = [ 'GET' , 'POST' ]) def upload(): if request.method = = 'GET' : return render_template( 'upload.html' ) upload_obj = request.files.get( 'code_file' ) if not upload_obj: return '没有选择文件上传' ret = stream_type(stream_split(upload_obj.stream), [ "jpg" , "png" , "pdf" ]) if not ret: return '上传失败,文件类型不匹配,类型必须 "jpg" or "png" or "pdf"' file_name = upload_obj.filename upload_obj.save(os.path.join( 'files' , file_name)) return '上传文件成功' |
upload.html
1
2
3
4
5
6
7
8
|
{% extends 'layout.html' %} {% block content %} < h1 >上传代码</ h1 > < form action="" method="post" enctype="multipart/form-data"> < input type="file" name="code_file"> < input type="submit" value="上传"></ input > </ form > {% endblock %} |
开始上传文件:
上传不在列表中的文件类型
上传在列表中的文件类型
---------------------
作者:小粉优化大师
来源:CNBLOGS
原文:https://www.cnblogs.com/ygbh/p/11918876.html
版权声明:本文为作者原创文章,转载请附上博文链接!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现