protobuf 无proto 解码 decode 语言 java python

package com.example.demo.services;

import com.google.common.base.Charsets;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.WireFormat;

import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.IOException;

public class DouyinRead {

    public static interface Listener{
        default void onComing(String nickname, String douyinNumber, String liveId){}
        default void onSpeaking(String nickname, String douyinNumber){}
    }
    private static int i = 0;
    public static void main(String[] args) throws IOException {
        FileInputStream fis = new FileInputStream("F:\\dy抓包\\74_.txt");
        byte[] buf = new byte[2048];
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        int len = 0;
        while((len = fis.read(buf)) > 0){
            baos.write(buf, 0, len);
        }
        String s = decodeProto(baos.toByteArray(), false, new String[]{"", ""}, new Listener() {
            @Override
            public void onComing(String nickname, String douyinNumber, String liveId) {
                System.out.println(nickname + "." + douyinNumber + "." + liveId + "来了");
                //Listener.super.onComing(nickname, douyinNumber, liveId);
            }
        });
        System.out.println(s);
    }

    //main
    public static String decodeProto(byte[] data, boolean singleLine, String[] messageType, Listener listener) throws IOException {
        return decodeProto(ByteString.copyFrom(data), 0, singleLine, messageType, listener);
    }

    public static String decodeProto(ByteString data, int depth, boolean singleLine, String[] messageType, Listener listener) throws IOException {
        final CodedInputStream input = CodedInputStream.newInstance(data.asReadOnlyByteBuffer());
        return decodeProtoInput(input, depth, singleLine, messageType, listener);
    }

    /**
     * zyl's自定义流程
     *  @param number  数量
     * @param depth   深度
     * @param str     str
     * @param msgType msg类型
     * @param listener
     */
    private static void customProcess(int number, int depth, String str, String[] msgType, Listener listener){
        if(number == 2 && depth == 1){
            System.out.println(str);
        }
        if(number == 12 && depth == 3){
            System.out.println(str);
        }
        //System.out.println(number+":" + depth + ":" + str);
        if(number == 1 && depth == 1){
            msgType[0] = str + i++; //WebcastChatMessage
            //   System.out.println("msgType: " + str);
        }

        /*if(number == 1 && depth == 2){
            msgType[1] = str;
        }
*/

        if(msgType[0].startsWith("WebcastChatMessage")) {
            //System.out.println("msgType:" + msgType[0] + ",number:" + number + ",depth:" + depth + ",str:" + str);
            //昵称
            if(number == 3 && depth == 3){
                System.out.print(str);
            }
            //账号
            if(number == 38 && depth == 3){
                System.out.print("("+str+")");
            }
            //弹幕
            if(number==3 && depth == 2){
                System.out.println(":" + str);
            }
        }

        //进入房间消息
        if(msgType[0].startsWith("WebcastMemberMessage")){
            if(number == 3 && depth == 3){
                msgType[1] = str;
            }

            if(number == 1 && depth == 4 && str.equals("live_room_enter_toast")){
                msgType[0] = "live_room_enter_toast";
            }
        }
        if(msgType[0].equals("live_room_enter_toast")){
            //昵称
            if(number == 3 && depth == 7){
                //System.out.print(str);
            }
            //账号
            if(number == 38 && depth == 7){
                listener.onComing("", str, msgType[1]);
                //System.out.print("(" + str + ")");
            }
            if(number == 68 && depth == 7){
                //System.out.println("来了");
                msgType[0] = "none";
            }
        }

    }

    private static String decodeProtoInput(CodedInputStream input, int depth, boolean singleLine, String[] msgType, Listener listener) throws IOException {
        StringBuilder s = new StringBuilder("{ ");
        boolean foundFields = false;
        while (true) {
            final int tag = input.readTag();
            int type = WireFormat.getTagWireType(tag);
            if (tag == 0 || type == WireFormat.WIRETYPE_END_GROUP) {
                break;
            }
            foundFields = true;
            protoNewline(depth, s, singleLine);

            final int number = WireFormat.getTagFieldNumber(tag);
            s.append(number).append(".").append(depth).append(": ");

            switch (type) {
                case WireFormat.WIRETYPE_VARINT:
                    long lng = input.readInt64();
                    customProcess(number, depth, String.valueOf(lng), msgType, listener);
                    s.append(lng);
                    break;
                case WireFormat.WIRETYPE_FIXED64:
                    s.append(Double.longBitsToDouble(input.readFixed64()));
                    break;
                case WireFormat.WIRETYPE_LENGTH_DELIMITED:
                    ByteString data = input.readBytes();
                    try {
                        String submessage = decodeProto(data, depth + 1, singleLine, msgType, listener);
                        if (data.size() < 30) {
                            boolean probablyString = true;
                            String str = new String(data.toByteArray(), Charsets.UTF_8);
                            for (char c : str.toCharArray()) {
                                if (c < '\n') {
                                    probablyString = false;
                                    break;
                                }
                            }
                            customProcess(number, depth, str, msgType, listener);
                            if (probablyString) {
                                s.append("\"").append(str).append("\" ");
                            }
                        }
                        s.append(submessage);
                    } catch (IOException e) {
                        String str = new String(data.toByteArray());
                        customProcess(number, depth, str, msgType, listener);
                        s.append('"').append(str).append('"');
                    }
                    break;
                case WireFormat.WIRETYPE_START_GROUP:
                    s.append(decodeProtoInput(input, depth + 1, singleLine, msgType, listener));
                    break;
                case WireFormat.WIRETYPE_FIXED32:
                    s.append(Float.intBitsToFloat(input.readFixed32()));
                    break;
                default:
                    throw new InvalidProtocolBufferException("Invalid wire type");
            }

        }
        if (foundFields) {
            protoNewline(depth - 1, s, singleLine);
        }
        return s.append('}').toString();
    }

    private static void protoNewline(int depth, StringBuilder s, boolean noNewline) {
        if (noNewline) {
            s.append(" ");
            return;
        }
        s.append('\n');
        for (int i = 0; i <= depth; i++) {
            s.append("\t");
        }
    }
}
JAVA版

maven依赖:

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava-base</artifactId>
<version>r03</version>
</dependency>

<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-javalite</artifactId>
<version>3.8.0-rc-1</version>
</dependency>

python:

git源码应该2.7+的:https://github.com/nevermoe/protobuf_decoder
修改为3.5+的parse.py(解码只需要这一个文件就够了,其他文件其实没用)

# -*- coding: utf-8 -*-
import sys
import codecs
import struct
import json
import traceback

strings = []

def GetDynamicWireFormat(data, start, end):
    wire_type = data[start] & 0x7
    firstByte = data[start]
    if (firstByte & 0x80) == 0:
        field_number = (firstByte >> 3)
        return (start+1, wire_type, field_number)
    else:
        byteList = []
        pos = 0
        while True:
            if start+pos >= end:
                return (None, None, None)
            oneByte = data[start+pos]
            byteList.append(oneByte & 0x7F)
            pos = pos + 1
            if oneByte & 0x80 == 0x0:
                break;

        newStart = start + pos

        index = len(byteList) - 1
        field_number = 0
        while index >= 0:
            field_number = (field_number << 0x7) + byteList[index]
            index = index - 1

        field_number = (field_number >> 3)
        return (newStart, wire_type, field_number)



#return (num, newStart, success)
def RetrieveInt(data, start, end):
    pos = 0
    byteList = []
    while True:
        if start+pos >= end:
            return (None, None, False)
        oneByte = data[start+pos]
        byteList.append(oneByte & 0x7F)
        pos = pos + 1
        if oneByte & 0x80 == 0x0:
            break

    newStart = start + pos

    index = len(byteList) - 1
    num = 0
    while index >= 0:
        num = (num << 0x7) + byteList[index]
        index = index - 1
    return (num, newStart, True)


def ParseRepeatedField(data, start, end, message, depth = 0):
    while start < end:
        (num, start, success) = RetrieveInt(data, start, end)
        if success == False:
            return False
        message.append(num)
    return True

def ParseData(data, start, end, messages, depth = 0):
    global strings
    #print strings
    ordinary = 0
    while start < end:
        (start, wire_type, field_number) = GetDynamicWireFormat(data, start, end)
        if start == None:
            return False

        if wire_type == 0x00:#Varint
            #(num, start, success) = RetrieveInt(data, start+1, end)
            (num, start, success) = RetrieveInt(data, start, end)
            if success == False:
                return False

            if depth != 0:
                strings.append('\t'*depth)
            strings.append("(%d) Varint: %d\n" % (field_number, num))
            messages['%02d:%02d:Varint' % (field_number,ordinary)] = num
            ordinary  = ordinary + 1

        elif wire_type == 0x01:#64-bit
            num = 0
            pos = 7
            while pos >= 0:
                #if start+1+pos >= end:
                if start+pos >= end:
                    return False
                #num = (num << 8) + ord(data[start+1+pos])
                num = (num << 8) + data[start+pos]
                pos = pos - 1

            #start = start + 9
            start = start + 8
            try:
                floatNum = struct.unpack('d',struct.pack('q',int(hex(num),16)))
                floatNum = floatNum[0]
            except:
                floatNum = None
                
            if depth != 0:
                strings.append('\t'*depth)
            if floatNum != None:
                strings.append("(%d) 64-bit: 0x%x / %f\n" % (field_number, num, floatNum))
                messages['%02d:%02d:64-bit' % (field_number,ordinary)] = floatNum
            else:
                strings.append("(%d) 64-bit: 0x%x\n" % (field_number, num))
                messages['%02d:%02d:64-bit' % (field_number,ordinary)] = num


            ordinary = ordinary + 1

            
        elif wire_type == 0x02:#Length-delimited
            curStrIndex = len(strings)
            #(stringLen, start, success) = RetrieveInt(data, start+1, end)
            (stringLen, start, success) = RetrieveInt(data, start, end)
            if success == False:
                return False
            #stringLen = ord(data[start+1])
            if depth != 0:
                strings.append('\t'*depth)
            strings.append("(%d) embedded message:\n" % field_number)
            messages['%02d:%02d:embedded message' % (field_number, ordinary)] = {}
            if start+stringLen > end:
                del strings[curStrIndex + 1:]    #pop failed result
                messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None)
                return False

            ret = ParseData(data, start, start+stringLen, messages['%02d:%02d:embedded message' % (field_number, ordinary)], depth+1)
            #print '%d:%d:embedded message' % (field_number, ordinary)
            if ret == False:
                del strings[curStrIndex + 1:]    #pop failed result
                #print 'pop: %d:%d:embedded message' % (field_number, ordinary)
                messages.pop('%02d:%02d:embedded message' % (field_number, ordinary), None)
                #print messages
                if depth != 0:
                    strings.append('\t'*depth)

                strings.append("(%d) repeated:\n" % field_number)
                try:
                    data[start:start+stringLen].decode('utf-8')# .encode('utf-8')
                    strings.append("(%d) string: %s\n" % (field_number, data[start:start+stringLen]))
                    messages['%02d:%02d:string' % (field_number, ordinary)] = data[start:start+stringLen].decode('utf-8')
                except:
                   if depth != 0:
                       strings.append('\t'*depth)

                   strings.append("(%d) repeated:\n" % field_number)
                   messages['%02d:%02d:repeated' % (field_number, ordinary)] = []
                   ret = ParseRepeatedField(data, start, start+stringLen, messages['%02d:%02d:repeated' % (field_number, ordinary)], depth+1)
                   if ret == False:
                       del strings[curStrIndex + 1:]     #pop failed result
                       messages.pop('%02d:%02d:repeated' % (field_number, ordinary), None)
                       #print traceback.format_exc()
                       hexStr = ['0x%x' % x for x in data[start:start+stringLen]]
                       hexStr = ':'.join(hexStr)
                       strings.append("(%d) bytes: %s\n" % (field_number, hexStr))
                       messages['%02d:%02d:bytes' % (field_number, ordinary)] = hexStr

            ordinary = ordinary + 1
            #start = start+2+stringLen
            start = start+stringLen

        elif wire_type == 0x05:#32-bit
            num = 0
            pos = 3
            while pos >= 0:

                #if start+1+pos >= end:
                if start+pos >= end:
                    return False
                #num = (num << 8) + ord(data[start+1+pos])
                num = (num << 8) + data[start+pos]
                pos = pos - 1

            #start = start + 5
            start = start + 4
            try:
                floatNum = struct.unpack('f',struct.pack('i',int(hex(num),16)))
                floatNum = floatNum[0]
            except:
                floatNum = None

                
            if depth != 0:
                strings.append('\t'*depth)
            if floatNum != None:
                strings.append("(%d) 32-bit: 0x%x / %f\n" % (field_number, num, floatNum))
                messages['%02d:%02d:32-bit' % (field_number,ordinary)] = floatNum
            else:
                strings.append("(%d) 32-bit: 0x%x\n" % (field_number, num))
                messages['%02d:%02d:32-bit' % (field_number,ordinary)] = num 

            ordinary = ordinary + 1


        else:
            return False

    return True

def ParseProto(fileName):
    data = open(fileName, "rb").read()
    size = len(data)

    messages = {}
    ParseData(data, 0, size, messages)

    return messages

def GenValueList(value):
    valueList = []
    #while value > 0:
    while value >= 0:
        oneByte = (value & 0x7F)
        value = (value >> 0x7)
        if value > 0:
            oneByte |= 0x80
        valueList.append(oneByte)
        if value == 0:
            break
    
    return valueList


def WriteValue(value, output):
    byteWritten = 0
    #while value > 0:
    while value >= 0:
        oneByte = (value & 0x7F)
        value = (value >> 0x7)
        if value > 0:
            oneByte |= 0x80
        output.append(oneByte)
        byteWritten += 1
        if value == 0:
            break
    
    return byteWritten

def WriteVarint(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x00
    #output.append(wireFormat)
    #byteWritten += 1
    byteWritten += WriteValue(wireFormat, output)
    #while value > 0:
    while value >= 0:
        oneByte = (value & 0x7F)
        value = (value >> 0x7)
        if value > 0:
            oneByte |= 0x80
        output.append(oneByte)
        byteWritten += 1
        if value == 0:
            break
    
    return byteWritten

def Write64bitFloat(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x01
    #output.append(wireFormat)
    #byteWritten += 1
    byteWritten += WriteValue(wireFormat, output)
    
    bytesStr = struct.pack('d', value).encode('hex')
    n = 2
    bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)]
    #i = len(bytesList) - 1
    #while i >= 0:
    #    output.append(int(bytesList[i],16))
    #    byteWritten += 1
    #    i -= 1
    for i in range(0,len(bytesList)):
        output.append(int(bytesList[i],16))
        byteWritten += 1

    return byteWritten

def Write64bit(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x01
    byteWritten += WriteValue(wireFormat, output)
    #output.append(wireFormat)
    #byteWritten += 1
    
    for i in range(0,8):
        output.append(value & 0xFF)
        value = (value >> 8)
        byteWritten += 1

    return byteWritten

def Write32bitFloat(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x05
    #output.append(wireFormat)
    #byteWritten += 1
    byteWritten += WriteValue(wireFormat, output)
    
    bytesStr = struct.pack('f', value).encode('hex')
    n = 2
    bytesList = [bytesStr[i:i+n] for i in range(0, len(bytesStr), n)]
    #i = len(bytesList) - 1
    #while i >= 0:
    #    output.append(int(bytesList[i],16))
    #    byteWritten += 1
    #    i -= 1
    for i in range(0,len(bytesList)):
        output.append(int(bytesList[i],16))
        byteWritten += 1


    return byteWritten

def Write32bit(field_number, value, output):
    byteWritten = 0
    wireFormat = (field_number << 3) | 0x05
    #output.append(wireFormat)
    #byteWritten += 1
    byteWritten += WriteValue(wireFormat, output)
    
    for i in range(0,4):
        output.append(value & 0xFF)
        value = (value >> 8)
        byteWritten += 1

    return byteWritten

def WriteRepeatedField(message, output):
    byteWritten = 0
    for v in message:
        byteWritten += WriteValue(v, output)
    return byteWritten


def Decode(binary):
    messages = {}
    ret = ParseData(binary, 0, len(binary), messages)

    if ret == False:
        return False

    return messages


def ReEncode(messages, output):
    byteWritten = 0
    #for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[0]+x.split(':')[1])):
    for key in sorted(messages.iterkeys(), key= lambda x: int(x.split(':')[1])):
        keyList = key.split(':')
        field_number = int(keyList[0])
        wire_type = keyList[2]
        value = messages[key]

        if wire_type == 'Varint':
            byteWritten += WriteVarint(field_number, value, output)
        elif wire_type == '32-bit':
            if type(value) == type(float(1.0)):
                byteWritten += Write32bitFloat(field_number, value, output)
            else:
                byteWritten += Write32bit(field_number, value, output)
        elif wire_type == '64-bit':
            if type(value) == type(float(1.0)):
                byteWritten += Write64bitFloat(field_number, value, output)
            else:
                byteWritten += Write64bit(field_number, value, output)
        elif wire_type == 'embedded message':
            wireFormat = (field_number << 3) | 0x02 
            byteWritten += WriteValue(wireFormat, output)
            index = len(output)
            tmpByteWritten = ReEncode(messages[key], output)
            valueList = GenValueList(tmpByteWritten)
            listLen = len(valueList)
            for i in range(0,listLen):
                output.insert(index, valueList[i])
                index += 1
            #output[index] = tmpByteWritten
            #print "output:", output
            byteWritten += tmpByteWritten + listLen
        elif wire_type == 'repeated':
            wireFormat = (field_number << 3) | 0x02
            byteWritten += WriteValue(wireFormat, output)
            index = len(output)
            tmpByteWritten = WriteRepeatedField(messages[key], output)
            valueList = GenValueList(tmpByteWritten)
            listLen = len(valueList)
            for i in range(0,listLen):
                output.insert(index, valueList[i])
                index += 1
            #output[index] = tmpByteWritten
            #print "output:", output
            byteWritten += tmpByteWritten + listLen
        elif wire_type == 'string':
            wireFormat = (field_number << 3) | 0x02 
            byteWritten += WriteValue(wireFormat, output)

            bytesStr = [int(elem.encode("hex"),16) for elem in messages[key].encode('utf-8')]

            byteWritten += WriteValue(len(bytesStr),output)

            output.extend(bytesStr)
            byteWritten += len(bytesStr)
        elif wire_type == 'bytes':
            wireFormat = (field_number << 3) | 0x02 
            byteWritten += WriteValue(wireFormat, output)

            bytesStr = [int(byte,16) for byte in messages[key].split(':')]
            byteWritten += WriteValue(len(bytesStr),output)

            output.extend(bytesStr)
            byteWritten += len(bytesStr)
            

    return byteWritten
    

def SaveModification(messages, fileName):
    output = list()
    ReEncode(messages, output)
    f = open(fileName, 'wb')
    f.write(bytearray(output))
    f.close()
    

if __name__ == "__main__":
    if sys.argv[1] == "dec":
        messages = ParseProto('tmp.pb')

        f = codecs.open('tmp.json', 'wb', 'utf-8')
        json.dump(messages, f, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8')
        f.close()

        #for str in strings:
        #    try:
        #        print str,
        #    except:
        #        pass
        f.close()

    elif sys.argv[1] == "enc":

        f = codecs.open('tmp.json', 'r', 'utf-8')
        messages = json.load(f, encoding='utf-8')
        f.close()

        SaveModification(messages, "tmp.pb")

    else:
        messages = ParseProto(sys.argv[1])

        print(json.dumps(messages, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8'))

        # modify any field you like
        #messages['01:00:embedded message']['01:00:string'] = "あなた"

        # dump and reload the 'messages' json objects to ensure it being utf-8 encoded
        f = open('tmp.json', 'wb')
        json.dump(messages, f, indent=4, sort_keys=True, ensure_ascii=False, encoding='utf-8')
        f.close()
        f = codecs.open('tmp.json', 'r', 'utf-8')
        messages = json.load(f, encoding='utf-8')
        f.close()

        # the modification is saved in file named "modified"
        SaveModification(messages, "modified")

 

调用示例:

def pxprint(dict, indent=0):
    spaces = "    "
    for k,v in dict.items():
        if isinstance(v, Dict):
            print(spaces*(indent+1) + f'"{k}": {{')
            pxprint(v,indent+1)
            print(spaces*(indent+1) + '}')
        else:
            try:
                print(spaces*(indent+1) + f'"{k}":"{v}"')
            except UnicodeEncodeError as e:
                # python的终端打印不出多字节字符,会抛这个error,单独处理下,数据库要能存储才行,如mysql,则要设置编码[数据库和表都要设置]为utf8mb4_general_ci
                print(spaces*(indent+1) + f'"{k}":"error-v"')

def main():
    dict = pbparser.ParseProto(r"F:\dy抓包\74_.txt")
    pxprint(dict)

main()
调用示例

 

解析结果样例:

 

 

完!

posted @ 2021-12-08 10:53  流失的痕迹  阅读(701)  评论(0编辑  收藏  举报