protobuf 无proto 解码 decode 语言 java python
package com.example.demo.services; import com.google.common.base.Charsets; import com.google.protobuf.ByteString; import com.google.protobuf.CodedInputStream; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.WireFormat; import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.IOException; public class DouyinRead { public static interface Listener{ default void onComing(String nickname, String douyinNumber, String liveId){} default void onSpeaking(String nickname, String douyinNumber){} } private static int i = 0; public static void main(String[] args) throws IOException { FileInputStream fis = new FileInputStream("F:\\dy抓包\\74_.txt"); byte[] buf = new byte[2048]; ByteArrayOutputStream baos = new ByteArrayOutputStream(); int len = 0; while((len = fis.read(buf)) > 0){ baos.write(buf, 0, len); } String s = decodeProto(baos.toByteArray(), false, new String[]{"", ""}, new Listener() { @Override public void onComing(String nickname, String douyinNumber, String liveId) { System.out.println(nickname + "." + douyinNumber + "." + liveId + "来了"); //Listener.super.onComing(nickname, douyinNumber, liveId); } }); System.out.println(s); } //main public static String decodeProto(byte[] data, boolean singleLine, String[] messageType, Listener listener) throws IOException { return decodeProto(ByteString.copyFrom(data), 0, singleLine, messageType, listener); } public static String decodeProto(ByteString data, int depth, boolean singleLine, String[] messageType, Listener listener) throws IOException { final CodedInputStream input = CodedInputStream.newInstance(data.asReadOnlyByteBuffer()); return decodeProtoInput(input, depth, singleLine, messageType, listener); } /** * zyl's自定义流程 * @param number 数量 * @param depth 深度 * @param str str * @param msgType msg类型 * @param listener */ private static void customProcess(int number, int depth, String str, String[] msgType, Listener listener){ if(number == 2 && depth == 1){ System.out.println(str); } if(number == 12 && depth == 3){ System.out.println(str); } //System.out.println(number+":" + depth + ":" + str); if(number == 1 && depth == 1){ msgType[0] = str + i++; //WebcastChatMessage // System.out.println("msgType: " + str); } /*if(number == 1 && depth == 2){ msgType[1] = str; } */ if(msgType[0].startsWith("WebcastChatMessage")) { //System.out.println("msgType:" + msgType[0] + ",number:" + number + ",depth:" + depth + ",str:" + str); //昵称 if(number == 3 && depth == 3){ System.out.print(str); } //账号 if(number == 38 && depth == 3){ System.out.print("("+str+")"); } //弹幕 if(number==3 && depth == 2){ System.out.println(":" + str); } } //进入房间消息 if(msgType[0].startsWith("WebcastMemberMessage")){ if(number == 3 && depth == 3){ msgType[1] = str; } if(number == 1 && depth == 4 && str.equals("live_room_enter_toast")){ msgType[0] = "live_room_enter_toast"; } } if(msgType[0].equals("live_room_enter_toast")){ //昵称 if(number == 3 && depth == 7){ //System.out.print(str); } //账号 if(number == 38 && depth == 7){ listener.onComing("", str, msgType[1]); //System.out.print("(" + str + ")"); } if(number == 68 && depth == 7){ //System.out.println("来了"); msgType[0] = "none"; } } } private static String decodeProtoInput(CodedInputStream input, int depth, boolean singleLine, String[] msgType, Listener listener) throws IOException { StringBuilder s = new StringBuilder("{ "); boolean foundFields = false; while (true) { final int tag = input.readTag(); int type = WireFormat.getTagWireType(tag); if (tag == 0 || type == WireFormat.WIRETYPE_END_GROUP) { break; } foundFields = true; protoNewline(depth, s, singleLine); final int number = WireFormat.getTagFieldNumber(tag); s.append(number).append(".").append(depth).append(": "); switch (type) { case WireFormat.WIRETYPE_VARINT: long lng = input.readInt64(); customProcess(number, depth, String.valueOf(lng), msgType, listener); s.append(lng); break; case WireFormat.WIRETYPE_FIXED64: s.append(Double.longBitsToDouble(input.readFixed64())); break; case WireFormat.WIRETYPE_LENGTH_DELIMITED: ByteString data = input.readBytes(); try { String submessage = decodeProto(data, depth + 1, singleLine, msgType, listener); if (data.size() < 30) { boolean probablyString = true; String str = new String(data.toByteArray(), Charsets.UTF_8); for (char c : str.toCharArray()) { if (c < '\n') { probablyString = false; break; } } customProcess(number, depth, str, msgType, listener); if (probablyString) { s.append("\"").append(str).append("\" "); } } s.append(submessage); } catch (IOException e) { String str = new String(data.toByteArray()); customProcess(number, depth, str, msgType, listener); s.append('"').append(str).append('"'); } break; case WireFormat.WIRETYPE_START_GROUP: s.append(decodeProtoInput(input, depth + 1, singleLine, msgType, listener)); break; case WireFormat.WIRETYPE_FIXED32: s.append(Float.intBitsToFloat(input.readFixed32())); break; default: throw new InvalidProtocolBufferException("Invalid wire type"); } } if (foundFields) { protoNewline(depth - 1, s, singleLine); } return s.append('}').toString(); } private static void protoNewline(int depth, StringBuilder s, boolean noNewline) { if (noNewline) { s.append(" "); return; } s.append('\n'); for (int i = 0; i <= depth; i++) { s.append("\t"); } } }
maven依赖:
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-base</artifactId>
<version>r03</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-javalite</artifactId>
<version>3.8.0-rc-1</version>
</dependency>
python:
git源码应该2.7+的:https://github.com/nevermoe/protobuf_decoder
修改为3.5+的parse.py(解码只需要这一个文件就够了,其他文件其实没用)
# -*- coding: utf-8 -*- import sys import codecs import struct import json import traceback strings = [] def GetDynamicWireFormat(data, start, end): wire_type = data[start] & 0x7 firstByte = data[start] if (firstByte & 0x80) == 0: field_number = (firstByte >> 3) return (start+1, wire_type, field_number) else: byteList = [] pos = 0 while True: if start+pos >= end: return (None, None, None) oneByte = data[start+pos] byteList.append(oneByte & 0x7F) pos = pos + 1 if oneByte & 0x80 == 0x0: break; newStart = start + pos index = len(byteList) - 1 field_number = 0 while index >= 0: field_number = (field_number << 0x7) + byteList[index] index = index - 1 field_number = (field_number >> 3) return (newStart, wire_type, field_number) #return (num, newStart, success) def RetrieveInt(data, start, end): pos = 0 byteList = [] while True: if start+pos >= end: return (None, None, False) oneByte = data[start+pos] byteList.append(oneByte & 0x7F) pos = pos + 1 if oneByte & 0x80 == 0x0: break newStart = start + pos index = len(byteList) - 1 num = 0 while index >= 0: num = (num << 0x7) + byteList[index] index = index - 1 return (num, newStart, True) def ParseRepeatedField(data, start, end, message, depth = 0): while start < end: (num, start, success) = RetrieveInt(data, start, end) if success == False: return False message.append(num) return True def ParseData(data, start, end, messages, depth = 0): global strings #print strings ordinary = 0 while start < end: (start, wire_type, field_number) = GetDynamicWireFormat(data, start, end) if start == None: return False if wire_type == 0x00:#Varint #(num, start, success) = RetrieveInt(data, start+1, end) (num, start, success) = RetrieveInt(data, start, end) if success == False: return False if depth != 0: strings.append('\t'*depth) strings.append("(%d) Varint: %d\n" % (field_number, num)) messages['%02d:%02d:Varint' % (field_number,ordinary)] = num ordinary = ordinary + 1 elif wire_type == 0x01:#64-bit num = 0 pos = 7 while pos >= 0: #if start+1+pos >= end: if start+pos >= end: return False #num = (num << 8) + ord(data[start+1+pos]) num = (num << 8) + data[start+pos] pos = pos - 1 #start = start + 9 start = start + 8 try: floatNum = struct.unpack('d',struct.pack('q',int(hex(num),16))) floatNum = floatNum[0] except: floatNum = None if depth != 0: strings.append('\t'*depth) if floatNum != None: strings.append("(%d) 64-bit: 0x%x / %f\n" % (field_number, num, floatNum)) messages['%02d:%02d:64-bit' % (field_number,ordinary)] = floatNum else: strings.append("(%d) 64-bit: 0x%x\n" % (field_number, num)) messages['%02d:%02d:64-bit' % (field_number,ordinary)] = num ordinary = ordinary + 1 elif wire_type == 0x02:#Length-delimited curStrIndex = len(strings) #(stringLen, start, success) = RetrieveInt(data, start+1, end) (stringLen, start, success) = RetrieveInt(data, start, end) if success == False: return False #stringLen = ord(data[start+1]) if depth != 0: strings.append('\t'*depth) strings.append("(%d) embedded message:\n" % field_number) messages['%02d:%02d:embedded message' % (field_number, ordinary)] = {} if start+stringLen > end: del strings[curStrIndex + 1:] #pop failed result messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None) return False ret = ParseData(data, start, start+stringLen, messages['%02d:%02d:embedded message' % (field_number, ordinary)], depth+1) #print '%d:%d:embedded message' % (field_number, ordinary) if ret == False: del strings[curStrIndex + 1:] #pop failed result #print 'pop: %d:%d:embedded message' % (field_number, ordinary) messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None) #print messages if depth != 0: strings.append('\t'*depth) strings.append("(%d) repeated:\n" % field_number) try: data[start:start+stringLen].decode('utf-8')# .encode('utf-8') strings.append("(%d) string: %s\n" % (field_number, data[start:start+stringLen])) messages['%02d:%02d:string' % (field_number, ordinary)] = data[start:start+stringLen].decode('utf-8') except: if depth != 0: strings.append('\t'*depth) strings.append("(%d) repeated:\n" % field_number) messages['%02d:%02d:repeated' % (field_number, ordinary)] = [] ret = ParseRepeatedField(data, start, start+stringLen, messages['%02d:%02d:repeated' % (field_number, ordinary)], depth+1) if ret == False: del strings[curStrIndex + 1:] #pop failed result messages.pop('%02d:%02d:repeated' % (field_number, ordinary), None) #print traceback.format_exc() hexStr = ['0x%x' % x for x in data[start:start+stringLen]] hexStr = ':'.join(hexStr) strings.append("(%d) bytes: %s\n" % (field_number, hexStr)) messages['%02d:%02d:bytes' % (field_number, ordinary)] = hexStr ordinary = ordinary + 1 #start = start+2+stringLen start = start+stringLen elif wire_type == 0x05:#32-bit num = 0 pos = 3 while pos >= 0: #if start+1+pos >= end: if start+pos >= end: return False #num = (num << 8) + ord(data[start+1+pos]) num = (num << 8) + data[start+pos] pos = pos - 1 #start = start + 5 start = start + 4 try: floatNum = struct.unpack('f',struct.pack('i',int(hex(num),16))) floatNum = floatNum[0] except: floatNum = None if depth != 0: strings.append('\t'*depth) if floatNum != None: strings.append("(%d) 32-bit: 0x%x / %f\n" % (field_number, num, floatNum)) messages['%02d:%02d:32-bit' % (field_number,ordinary)] = floatNum else: strings.append("(%d) 32-bit: 0x%x\n" % (field_number, num)) messages['%02d:%02d:32-bit' % (field_number,ordinary)] = num ordinary = ordinary + 1 else: return False return True def ParseProto(fileName): data = open(fileName, "rb").read() size = len(data) messages = {} ParseData(data, 0, size, messages) return messages def GenValueList(value): valueList = [] #while value > 0: while value >= 0: oneByte = (value & 0x7F) value = (value >> 0x7) if value > 0: oneByte |= 0x80 valueList.append(oneByte) if value == 0: break return valueList def WriteValue(value, output): byteWritten = 0 #while value > 0: while value >= 0: oneByte = (value & 0x7F) value = (value >> 0x7) if value > 0: oneByte |= 0x80 output.append(oneByte) byteWritten += 1 if value == 0: break return byteWritten def WriteVarint(field_number, value, output): byteWritten = 0 wireFormat = (field_number << 3) | 0x00 #output.append(wireFormat) #byteWritten += 1 byteWritten += WriteValue(wireFormat, output) #while value > 0: while value >= 0: oneByte = (value & 0x7F) value = (value >> 0x7) if value > 0: oneByte |= 0x80 output.append(oneByte) byteWritten += 1 if value == 0: break return byteWritten def Write64bitFloat(field_number, value, output): byteWritten = 0 wireFormat = (field_number << 3) | 0x01 #output.append(wireFormat) #byteWritten += 1 byteWritten += WriteValue(wireFormat, output) bytesStr = struct.pack('d', value).encode('hex') n = 2 bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)] #i = len(bytesList) - 1 #while i >= 0: # output.append(int(bytesList[i],16)) # byteWritten += 1 # i -= 1 for i in range(0,len(bytesList)): output.append(int(bytesList[i],16)) byteWritten += 1 return byteWritten def Write64bit(field_number, value, output): byteWritten = 0 wireFormat = (field_number << 3) | 0x01 byteWritten += WriteValue(wireFormat, output) #output.append(wireFormat) #byteWritten += 1 for i in range(0,8): output.append(value & 0xFF) value = (value >> 8) byteWritten += 1 return byteWritten def Write32bitFloat(field_number, value, output): byteWritten = 0 wireFormat = (field_number << 3) | 0x05 #output.append(wireFormat) #byteWritten += 1 byteWritten += WriteValue(wireFormat, output) bytesStr = struct.pack('f', value).encode('hex') n = 2 bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)] #i = len(bytesList) - 1 #while i >= 0: # output.append(int(bytesList[i],16)) # byteWritten += 1 # i -= 1 for i in range(0,len(bytesList)): output.append(int(bytesList[i],16)) byteWritten += 1 return byteWritten def Write32bit(field_number, value, output): byteWritten = 0 wireFormat = (field_number << 3) | 0x05 #output.append(wireFormat) #byteWritten += 1 byteWritten += WriteValue(wireFormat, output) for i in range(0,4): output.append(value & 0xFF) value = (value >> 8) byteWritten += 1 return byteWritten def WriteRepeatedField(message, output): byteWritten = 0 for v in message: byteWritten += WriteValue(v, output) return byteWritten def Decode(binary): messages = {} ret = ParseData(binary, 0, len(binary), messages) if ret == False: return False return messages def ReEncode(messages, output): byteWritten = 0 #for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[0]+x.split(':')[1])): for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[1])): keyList = key.split(':') field_number = int(keyList[0]) wire_type = keyList[2] value = messages[key] if wire_type == 'Varint': byteWritten += WriteVarint(field_number, value, output) elif wire_type == '32-bit': if type(value) == type(float(1.0)): byteWritten += Write32bitFloat(field_number, value, output) else: byteWritten += Write32bit(field_number, value, output) elif wire_type == '64-bit': if type(value) == type(float(1.0)): byteWritten += Write64bitFloat(field_number, value, output) else: byteWritten += Write64bit(field_number, value, output) elif wire_type == 'embedded message': wireFormat = (field_number << 3) | 0x02 byteWritten += WriteValue(wireFormat, output) index = len(output) tmpByteWritten = ReEncode(messages[key], output) valueList = GenValueList(tmpByteWritten) listLen = len(valueList) for i in range(0,listLen): output.insert(index, valueList[i]) index += 1 #output[index] = tmpByteWritten #print "output:", output byteWritten += tmpByteWritten + listLen elif wire_type == 'repeated': wireFormat = (field_number << 3) | 0x02 byteWritten += WriteValue(wireFormat, output) index = len(output) tmpByteWritten = WriteRepeatedField(messages[key], output) valueList = GenValueList(tmpByteWritten) listLen = len(valueList) for i in range(0,listLen): output.insert(index, valueList[i]) index += 1 #output[index] = tmpByteWritten #print "output:", output byteWritten += tmpByteWritten + listLen elif wire_type == 'string': wireFormat = (field_number << 3) | 0x02 byteWritten += WriteValue(wireFormat, output) bytesStr = [int(elem.encode("hex"),16) for elem in messages[key].encode('utf-8')] byteWritten += WriteValue(len(bytesStr),output) output.extend(bytesStr) byteWritten += len(bytesStr) elif wire_type == 'bytes': wireFormat = (field_number << 3) | 0x02 byteWritten += WriteValue(wireFormat, output) bytesStr = [int(byte,16) for byte in messages[key].split(':')] byteWritten += WriteValue(len(bytesStr),output) output.extend(bytesStr) byteWritten += len(bytesStr) return byteWritten def SaveModification(messages, fileName): output = list() ReEncode(messages, output) f = open(fileName, 'wb') f.write(bytearray(output)) f.close() if __name__ == "__main__": if sys.argv[1] == "dec": messages = ParseProto('tmp.pb') f = codecs.open('tmp.json', 'wb', 'utf-8') json.dump(messages, f, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8') f.close() #for str in strings: # try: # print str, # except: # pass f.close() elif sys.argv[1] == "enc": f = codecs.open('tmp.json', 'r', 'utf-8') messages = json.load(f, encoding='utf-8') f.close() SaveModification(messages, "tmp.pb") else: messages = ParseProto(sys.argv[1]) print(json.dumps(messages, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8')) # modify any field you like #messages['01:00:embedded message']['01:00:string'] = "あなた" # dump and reload the 'messages' json objects to ensure it being utf-8 encoded f = open('tmp.json', 'wb') json.dump(messages, f, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8') f.close() f = codecs.open('tmp.json', 'r', 'utf-8') messages = json.load(f, encoding='utf-8') f.close() # the modification is saved in file named "modified" SaveModification(messages, "modified")
调用示例:
def pxprint(dict, indent=0): spaces = " " for k,v in dict.items(): if isinstance(v, Dict): print(spaces*(indent+1) + f'"{k}": {{') pxprint(v,indent+1) print(spaces*(indent+1) + '}') else: try: print(spaces*(indent+1) + f'"{k}":"{v}"') except UnicodeEncodeError as e: # python的终端打印不出多字节字符,会抛这个error,单独处理下,数据库要能存储才行,如mysql,则要设置编码[数据库和表都要设置]为utf8mb4_general_ci print(spaces*(indent+1) + f'"{k}":"error-v"') def main(): dict = pbparser.ParseProto(r"F:\dy抓包\74_.txt") pxprint(dict) main()
解析结果样例:
完!