http数据包解析碰到gzip压缩格式的解压
其中在做http数据包临控时碰到gzip压缩格式,在网友发布的一些技术文章基础上,经过一段时间的研究、调试,终于解析成功。现将核心代码公布于此,希望能够和大家一起共同学习交流。
注:以下代码需要依赖zlib开源库,可以到网上搜索下载。 /* HTTP gzip decompress */ int CNNHttp::httpgzdecompress(const PVOID zdata, DWORD nzdata, PVOID data, DWORD *ndata) { int err = 0; z_stream d_stream = {0}; /* decompression stream */ static char dummy_head[2] = { 0x8 + 0x7 * 0x10, (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, }; d_stream.zalloc = (alloc_func)0; d_stream.zfree = (free_func)0; d_stream.opaque = (voidpf)0; d_stream.next_in = (Bytef *)zdata; d_stream.avail_in = 0; d_stream.next_out = (Bytef *)data; if(inflateInit2(&d_stream, 47) != Z_OK) return -1; while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) { d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break; if(err != Z_OK ) { if(err == Z_DATA_ERROR) { d_stream.next_in = (Bytef*) dummy_head; d_stream.avail_in = sizeof(dummy_head); if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK) { return -1; } } else return -1; } } if(inflateEnd(&d_stream) != Z_OK) return -1; *ndata = d_stream.total_out; return 0; } ---------------------------------------------------------------------------------- |
|
#14 得分:0回复于: 2012-12-18 11:52:37
class CBuffer
{ std::vector<BYTE> vecBuf; public: void Reset(DWORD dwNewSize = 0) { vecBuf.resize(dwNewSize); } void Reset(PVOID pData, DWORD nLen) { vecBuf.clear(); Append(pData, nLen); } bool IsEmpty() const { return vecBuf.empty(); } void Append(PVOID pData, DWORD nLen) { vecBuf.insert(vecBuf.end(), (PBYTE)pData, (PBYTE)pData + nLen); } PBYTE GetData() { if (vecBuf.empty()) { return NULL; } return &vecBuf[0]; } DWORD GetDataLength() { return vecBuf.size(); } BYTE& operator[](DWORD _Pos) { return vecBuf[_Pos]; } }; //返回true表示此次回应的所有ChunkData数据接收结束 bool OnRecvGzipData(CBuffer& cBuf, CBuffer& cBufLeft, CBuffer& cBufTmp, bool bIsChunked, char *szGzipData, int nLen) { if (!bIsChunked) { cBuf.Append((PBYTE)szGzipData, nLen); return false; } cBufLeft.Append(szGzipData, nLen); szGzipData = (char*)cBufLeft.GetData(); nLen = (int)cBufLeft.GetDataLength(); while (nLen) { int nChunkSize = strtoul(szGzipData, NULL, 16); if (nChunkSize == 0) { return true; } char* pos = strstr(szGzipData, "\r\n"); if (!pos) { goto ToExit; } pos += strlen("\r\n"); int len = (pos - szGzipData) + nChunkSize + strlen("\r\n"); if (len > nLen) { goto ToExit; } cBuf.Append((PBYTE)pos, nChunkSize); szGzipData += len; nLen -= len; } cBufLeft.Reset(); return false; ToExit: cBufTmp.Reset(szGzipData, (DWORD)nLen); cBufLeft.Reset(cBufTmp.GetData(), cBufTmp.GetDataLength()); return false; } void OnRecvData(HANDLE hand, int iRet, char* buf) { //NNLOG_TRACE_FUN(); class CGzipDataPackBuf { public: CBuffer vecByteGzipDataBuf; CBuffer vecByteGzipDataDecodeBuf; CBuffer vecByteBufLeft; bool begin_gzip_text_html; bool bIsChunked; bool bIsUtf8; DWORD dwGetTickCount; CGzipDataPackBuf() { Reset(); } void Reset() { dwGetTickCount = ::GetTickCount(); begin_gzip_text_html = false; bIsChunked = false; bIsUtf8 = false; } }; typedef std::map<HANDLE, CGzipDataPackBuf> MapCGzipDataPackBuf_T; static MapCGzipDataPackBuf_T s_MapCGzipDataPackBuf; static CCriticalSection s_csMapCGzipDataPackBuf; if (!buf || (0 >= iRet)) { return; } NNLOG_DEBUG(_T("len:%u, data:%S"), iRet, buf); //CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), buf, (DWORD)iRet); const DWORD MAX_GzipDataBuf = 1024*1024; /* Content-Type: text/html; charset=utf-8 Content-Language: zh-CN content="text/html;charset=gb2312" Content-Type: text/html;charset=gbk */ char *tstr = NULL; bool bRecvChunkGzipDataComplete = false; CGzipDataPackBuf* pCGzipDataPackBuf = NULL; if ((15 <= iRet) && (0 == StrCmpNIA(buf, "HTTP/1.1 200 OK", 15)) && StrStrIA(buf, "Content-Type: text/html") && StrStrIA(buf, "Content-Encoding: gzip") && strstr(buf, "\r\n\r\n")//此处未考虑http头信息分包接收的情况 ) { //NNLOG_TRACE_ACTION_SCOPE(HTTP_200_OK); NN_WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf); pCGzipDataPackBuf = &s_MapCGzipDataPackBuf[hand]; } else { //NNLOG_TRACE_ACTION_SCOPE(Find pCGzipDataPackBuf); NN_WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf); MapCGzipDataPackBuf_T::iterator it = s_MapCGzipDataPackBuf.find(hand); if (s_MapCGzipDataPackBuf.end() == it) { return; } pCGzipDataPackBuf = &it->second; } char* pos = NULL; if (!pCGzipDataPackBuf->begin_gzip_text_html && (pos = strstr(buf, "\r\n\r\n")) ) { //NNLOG_TRACE_ACTION_SCOPE(check http data); pos[0] = 0; if (!StrStrIA(buf, "Content-Type: text/html") || !StrStrIA(buf, "Content-Encoding: gzip")) { //此处未考虑http头信息分包接收的情况 NNLOG_ASSERT(0);//上面已作初步判断,这里一般不太可能发生 WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf); s_MapCGzipDataPackBuf.erase(hand); return; } pCGzipDataPackBuf->begin_gzip_text_html = true; pCGzipDataPackBuf->bIsUtf8 = NULL != StrStrIA(buf, "charset=utf-8"); pCGzipDataPackBuf->bIsChunked = NULL != StrStrIA(buf, "Transfer-Encoding: chunked"); pos[0] = '\r';//还原 pos += strlen("\r\n\r\n"); iRet -= (pos - buf); buf = pos; bRecvChunkGzipDataComplete = OnRecvGzipData(pCGzipDataPackBuf->vecByteGzipDataBuf, pCGzipDataPackBuf->vecByteBufLeft, pCGzipDataPackBuf->vecByteGzipDataDecodeBuf, pCGzipDataPackBuf->bIsChunked, buf, iRet); //if (pCGzipDataPackBuf->bIsChunked) //{ // CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), (PVOID)buf, (DWORD)iRet); //} } else if (pCGzipDataPackBuf->begin_gzip_text_html) { //NNLOG_TRACE_ACTION_SCOPE(append gzip data); bRecvChunkGzipDataComplete = OnRecvGzipData(pCGzipDataPackBuf->vecByteGzipDataBuf, pCGzipDataPackBuf->vecByteBufLeft, pCGzipDataPackBuf->vecByteGzipDataDecodeBuf, pCGzipDataPackBuf->bIsChunked, buf, iRet); //if (pCGzipDataPackBuf->bIsChunked) //{ // CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), (PVOID)buf, (DWORD)iRet); //} } if (!pCGzipDataPackBuf->vecByteGzipDataBuf.IsEmpty() && (!pCGzipDataPackBuf->bIsChunked || bRecvChunkGzipDataComplete) || (MAX_GzipDataBuf < pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength()) ) { //NNLOG_TRACE_ACTION_SCOPE(try parse gzip); DWORD Length = MAX_GzipDataBuf*2; pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.Reset(Length); --Length; int iRetDec = CNNHttp::httpgzdecompress(pCGzipDataPackBuf->vecByteGzipDataBuf.GetData(), pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength(), pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), &Length); if (0 == iRetDec) { //<input type=hidden name=tn value="77071064_1_pg"> pCGzipDataPackBuf->vecByteGzipDataDecodeBuf[Length] = '\0'; CString gzipData; if (pCGzipDataPackBuf->bIsUtf8) { gzipData = CA2CT((const char*)pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), CP_UTF8); } else { gzipData = CA2CT((const char*)pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), CP_ACP); //NNLOG_DEBUG(_T("gzip len:%u, data:%S"), Length, vecByteGzipDataDecodeBuf.GetData()); } if (!gzipData.IsEmpty()) { if (mc.GetdwHttpGzipPackMaxShowLen() < (DWORD)gzipData.GetLength()) { gzipData = gzipData.Left(mc.GetdwHttpGzipPackMaxShowLen() / 2) + _T("...") + gzipData.Right(mc.GetdwHttpGzipPackMaxShowLen() / 2); } NNLOG_DEBUG(_T("gzip len:%u, data:%s"), gzipData.GetLength(), gzipData.GetString()); } //pCGzipDataPackBuf->vecByteGzipDataBuf.Reset(); } if (((0 == iRetDec) && !pCGzipDataPackBuf->bIsChunked) || bRecvChunkGzipDataComplete || (MAX_GzipDataBuf < pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength())) { WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf); s_MapCGzipDataPackBuf.erase(hand); } } { DWORD dwGetTickCount = ::GetTickCount(); WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf); NNLOG_DEBUG(_T("s_MapCGzipDataPackBuf.size():%u"), s_MapCGzipDataPackBuf.size()); BOOST_FOREACH(MapCGzipDataPackBuf_T::value_type& v, s_MapCGzipDataPackBuf) { CGzipDataPackBuf& gdpb(v.second); if (dwGetTickCount - gdpb.dwGetTickCount > 1000 * 60) { s_MapCGzipDataPackBuf.erase(v.first); break;//下次再处理其它的 } } } } 在各层hook临控的代码中调用上面函数: DEFINE_MY_WINAPI_RET(int, recv)( IN SOCKET s, __out_bcount_part(len, return) __out_data_source(NETWORK) char FAR * buf, IN int len, IN int flags ) { LOG_TRACE_FUN(); int iRet = recv_(s, buf, len, flags); OnRecvData((HANDLE)s, iRet, buf); return iRet; } DEFINE_MY_WINAPI_RET(int, WSARecv)( IN SOCKET s, __in_ecount(dwBufferCount) __out_data_source(NETWORK) LPWSABUF lpBuffers, IN DWORD dwBufferCount, __out_opt LPDWORD lpNumberOfBytesRecvd, IN OUT LPDWORD lpFlags, __in_opt LPWSAOVERLAPPED lpOverlapped, __in_opt LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine ) { int iRet = WSARecv_(s, lpBuffers, dwBufferCount, lpNumberOfBytesRecvd, lpFlags, lpOverlapped, lpCompletionRoutine); if ((0 == iRet) && !(lpNumberOfBytesRecvd && (0 == *lpNumberOfBytesRecvd))/* || (WSA_IO_PENDING == ::GetLastError())*/) { LOG_TRACE_FUN(); for (DWORD i = 0; i < dwBufferCount; ++i) { OnRecvData((HANDLE)s, (lpNumberOfBytesRecvd && (1 == dwBufferCount)) ? *lpNumberOfBytesRecvd : (int)lpBuffers[i].len, lpBuffers[i].buf); } } return iRet; } NTSTATUS MYNTAPI(NtDeviceIoControlFile)(HANDLE FileHandle, HANDLE Event, PIO_APC_ROUTINE ApcRoutine, PVOID ApcContext, PIO_STATUS_BLOCK IoStatusBlock, ULONG IoControlCode, PVOID InputBuffer, ULONG InputBufferLength, PVOID OutputBuffer, ULONG OutputBufferLength ) { PAFD_WSABUF lpBuffers = NULL; PAFD_INFO AfdInfo = (PAFD_INFO)InputBuffer; if (((AFD_RECV == IoControlCode) || (IoControlCode == AFD_SEND)) && AfdInfo && AfdInfo->BufferArray) { lpBuffers = AfdInfo->BufferArray; } NTSTATUS st = NtDeviceIoControlFile_(FileHandle, Event, ApcRoutine, ApcContext, IoStatusBlock, IoControlCode, InputBuffer, InputBufferLength, OutputBuffer, OutputBufferLength); if (AFD_RECV == IoControlCode) { if (NT_SUCCESS(st) && lpBuffers && lpBuffers->buf) { LOG_TRACE_FUN(); OnRecvData(FileHandle, IoStatusBlock->Information, lpBuffers->buf); } } return st; } |
hadoop大数据相关
posted on 2016-01-04 10:41 浪子回头jin不换 阅读(2008) 评论(0) 编辑 收藏 举报