(转)复合文档格式分析

标 题: 【原创】复合文档格式分析
作 者: kanghtta
时 间: 2009-12-27,01:25:11
链 接: http://bbs.pediy.com/showthread.php?t=103868

参加了看雪10周年活动。见了很多牛牛,但苦于近来工作忙,经常潜水中,,不能白拿kanxue的u盘,也来回报下,重新活动下。。
如今,复合文档格式被广泛使用,虽然格式早就被人研究明白,也有相应的文档。
但网上大多都是些文字介绍,因此,想写个格式分析的东西来加深对复合文档的理解,
此小程序有如下功能:
1:打印输出文档头 512字节
2:分析ssat,sat,short sat
3:分析各个steam 使用的sat 和ssat的扇区号,
可在windows 和linux 下使用,更多功能我会逐步扩充好加上来。。。
写此程序的目的只是为了巩固下对复合文档的学习,具体的格式就不写了
,附件里是复合文档格式,英文好的看,

有用得着的朋友注意下,由于只是解析了下,没有建立红黑树,没有考虑ssat大于109扇区的情况,需要的可以此基础上扩充,应该不难。。。
代码:
#include <iostream>
#include
<fstream>

#include
<math.h>
#include
<tchar.h>
#include
<cassert>
#include
<string>
#include
<iomanip>
#include
<vector>
#include
<list>

#include
"ComDoc.h"

using namespace std;

/* declare const */

int SectorSize = 0;
int miniSectorSize = 0;
ULONG MaxMiniStreamSize
=0; /** short stream 的最大长度。用来判断使用的sat表式那种类型,小于在ssat中 */
/* declare function */
bool DumpDocHeader(PDocHeader pHeader);
bool IfReadFile(ifstream &inStream,unsigned char * buf,unsigned int iReadOffest,size_t size);
int GetOffestFremSid(SECT sid);
bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector<int> & slist,vector<int> & sslist,vector<vector<int> >& FatOfDirEntry,int );

int main(int argc,char *argv[])
{
cout
<<"Enter the name of the input file :\n";
string inputFileName;
getline(cin,inputFileName);

BYTE
* lpHeaderBuf = new BYTE[512];

memset(lpHeaderBuf,
0,512);

ifstream inStream;
inStream.open(inputFileName.data(),ios::binary
|ios::in);
assert(inStream.is_open());
inStream.read((
char*)lpHeaderBuf,512);
PDocHeader pHeaderSec
= (PDocHeader)lpHeaderBuf;

cout
<<"open the "<<inputFileName<<" file is successful\n "<<endl;
DumpDocHeader(pHeaderSec);
MaxMiniStreamSize
= pHeaderSec->_ulMiniSectorCutoff;
/**********************************************************************/
/* 处理msat */
/************************************************************************/
unsigned
long iMastSize = 0;
vector
<int> vMastList; /** 用于存储sat链表*/
if ((pHeaderSec->_sectDifStart == ENDOFCHAIN)&&(pHeaderSec->_csectDif == 0))
{
for(iMastSize = 1;iMastSize <= 109 ;iMastSize ++)
{
if ((( pHeaderSec->_sectFat[iMastSize-1] ) == 0xFFFFFFFF))
{
break;
}
else
{

BYTE
*SecBuf = new BYTE[SectorSize];
IfReadFile(inStream,SecBuf,GetOffestFremSid((pHeaderSec
->_sectFat[iMastSize - 1] )),SectorSize);
int * pListOfMast = (int *)SecBuf;
int i = 0;
while( pListOfMast[i]!= FREESECT )
{
vMastList.push_back(pListOfMast[i]);
//cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';

cout
<<"MAST["<<i<<"] == ";
cout
<<vMastList[i]<<'\t';
if (vMastList[vMastList.size()-1] == ENDOFCHAIN)
{
cout
<<endl;
}
i
++;
}

delete []SecBuf;
}

}
}
else
{
/** Mast 大于 109个扇区的情况 */
}

/************************************************************************/
/* 处理ssat */
/************************************************************************/

/*
* 读取存取ssat短链分配表占用的扇区链,
*/
vector
<int> vSsatFat;
vSsatFat.push_back(pHeaderSec
->_sectMiniFatStart);
int index = vSsatFat[0];
while(vMastList[index] != ENDOFCHAIN )
{
index
= vMastList[index];
vSsatFat.push_back(index);

}

vSsatFat.push_back(vMastList[index]);
int i = 0; /** 循环计数器*/
for (i; i < vSsatFat.size();i++)
{
cout
<<"S-FAT["<<i<<"] == ";
cout
<<hex<<vSsatFat[i]<<'\t';
}
cout
<<endl;

/*
* 读取ssat链,即用于记录short stream 的链
* pHeaderSec->_csectMiniFat 指出short stream 占几个扇区
*/
vector
<int> vSsatList;

for (i = 1; i <= pHeaderSec->_csectMiniFat; i++)
{

BYTE
*SecBuf = new BYTE[SectorSize];
IfReadFile(inStream,SecBuf,GetOffestFremSid(vSsatFat[i
-1]),SectorSize);
int * pListOfMast = (int *)SecBuf;
int i = 0;
while( pListOfMast[i]!= FREESECT )
{
vSsatList.push_back(pListOfMast[i]);
//cout<<vMastList.front()<<" \t"<<vMastList.back()<<'\t';

cout
<<"SSAT["<<i<<"] == ";
cout
<<vSsatList[i]<<'\t';
if (vSsatList[vSsatList.size()-1] == ENDOFCHAIN)
{
cout
<<endl;
}
i
++;
}

delete []SecBuf;

}

/************************************************************************/
/* Process Directory */
/************************************************************************/

/*
* 读取存取directory 的扇区sid链
*/
vector
<int> vDirFat;
vDirFat.push_back(pHeaderSec
->_sectDirStart);
index
= vDirFat[0];
while(vMastList[index] != ENDOFCHAIN )
{
index
= vMastList[index];
vDirFat.push_back(index);

}

vDirFat.push_back(vMastList[index]);

for (i =0; i < vDirFat.size();i++)
{
cout
<<"Director-FAT["<<i<<"] == ";
cout
<<hex<<vDirFat[i]<<'\t';
}
cout
<<endl;
cout
<<vDirFat.size()<<endl;

/*
*分析并处理directory目录
*/
vector
<DirectoryEntry> lDirList;
for (i=0; i<(vDirFat.size()-1) ;i++)
{
BYTE
*SecBuf = new BYTE[SectorSize];
IfReadFile(inStream,SecBuf,GetOffestFremSid(vDirFat[i]),SectorSize);
PDirectoryEntry pDirEntry
= PDirectoryEntry(SecBuf);
for (int j = 0;j<4;j++)
{
DirectoryEntry tempDirEntry
= pDirEntry[j];
lDirList.push_back(tempDirEntry);

}



delete []SecBuf;
}

vector
< vector<int> > vFatOfDirEntry;
/*
*processing DirectoryEntry list
*/
for(i = 0;i<lDirList.size();i++)
{
ProcessDirEntry(
&lDirList[i],vMastList,vSsatList,vFatOfDirEntry,i) ;
}



delete []lpHeaderBuf;

inStream.close();
return 0;
}

bool ProcessDirEntry(PDirectoryEntry pDirEntry,vector<int> & slist,vector<int> & sslist,vector<vector<int> >& vFatOfDirEntry,int i)
{
string DirName;
unsigned
int index;
vector
<int> FatOfDirEntry;
if (pDirEntry->_cb == 0)
{
return false;
}
else
{
char buf[256] ={0,0};
wcstombs(buf,(wchar_t
*)pDirEntry->_ab,(size_t)pDirEntry->_cb);
DirName
= buf;

/************************************************************************/
/* 是否是短流 */
/************************************************************************/
if((pDirEntry->_ulSize < MaxMiniStreamSize)&&(pDirEntry->_mse != STGTY_ROOT))
{
FatOfDirEntry.push_back(pDirEntry
->_sectStart);
index
= pDirEntry->_sectStart;
while(sslist[index] != ENDOFCHAIN)
{
index
=sslist[index];
FatOfDirEntry.push_back(index);
}
FatOfDirEntry.push_back(sslist[index]);

}
else
{
FatOfDirEntry.push_back(pDirEntry
->_sectStart);
index
= pDirEntry->_sectStart;
while(slist[index] != ENDOFCHAIN)
{
index
=slist[index];
FatOfDirEntry.push_back(index);
}
FatOfDirEntry.push_back(slist[index]);
}
cout
<<DirName<<'\t';
for (int j =0; j < FatOfDirEntry.size();j++)
{
cout
<<" \" \" <<DirName<<-FAT["<<j<<"] == ";
cout
<<hex<<FatOfDirEntry[j]<<'\t';
}
cout
<<endl;
vFatOfDirEntry.push_back(FatOfDirEntry);
FatOfDirEntry.clear();

}

return true;

}



bool IfReadFile(ifstream &inStream,unsigned char * buf,unsigned int iReadOffest,size_t size)
{
inStream.seekg(iReadOffest,ios::beg);
inStream.read((
char* )buf,size);
return true;

}

int GetOffestFremSid(SECT sid)
{
return sid*SectorSize+512;
}


bool DumpDocHeader(PDocHeader pHeader)
{
cout
<<"\t The comdoc flag is\t{ ";
for (int i = 0; i < 8; i++)
{
cout
<<hex<<(int)pHeader->_abSig[i]<<" ";
if (i!=7)
{
cout
<<',';
}
}
cout
<<'}'<<endl;
if (pHeader->_uByteOrder == 0xFFFE)
{
cout
<<"\t The file Byte order is Little-Endian"<<endl;
}
cout.setf(ios::dec,ios::basefield);
SectorSize
= (int)pow((double)2,(int)pHeader->_uSectorShift);

cout
<<"\t Size of a Sector in the compound document file is "<<SectorSize<<endl;
miniSectorSize
= (int)pow((double)2,(int)pHeader->_uMiniSectorShift);
cout
<<"\t Size of a short-sector in the short-stream container stream is "<<miniSectorSize<<endl;
cout
<<"\t Total number of sectors used for the sector allocation table is "<<(DWORD)pHeader->_csectFat<<endl;
cout
<<"\t SecID of first sector of the directory stream is "<<(ULONG)pHeader->_sectDirStart<<endl;
cout
<<"\t Minimum size of a standard stream is "<<(ULONG)pHeader->_ulMiniSectorCutoff<<endl;
cout
<<"\t SecID of first sector of the short-sector allocation table is "<<(ULONG)pHeader->_sectMiniFatStart<<endl;
cout
<<"\t Total number of sectors used for the short-sector allocation table is "<<(ULONG)pHeader->_csectMiniFat<<endl;
cout
<<"\t SecID of first sector of the master sector allocation table is "<<pHeader->_sectDifStart<<endl;
cout
<<"\t Total number of sectors used for the master sector allocation table is "<<(ULONG)pHeader->_csectDif<<endl;
cout
<<"\t First part of the master sector allocation table containing 109 SecIDs is "<<endl;
cout
<<"\t {\t";
for (i = 0;i<109;i++)
{
if ( pHeader->_sectFat[i] != -1 )
{
cout
<<pHeader->_sectFat[i]<<'\t';
if (i/20 !=0)
{
cout
<<endl;
}
}
else
{
break;
}
}
cout
<<'}'<<endl;
return true;
}

posted @ 2011-03-17 14:24  MayFirst  阅读(661)  评论(0编辑  收藏  举报