一个多线程通过 HTTP 下载文件的类(Linux下的实现)
多线程下载文件这个话题已经是老汤了。
在HTTP协议1.1中,允许通过增加一个HTTP Header“Range”来指定下载文件的区间。
所以一般的做法都是:
首先获取到文件大小(通过Content-Length)
开一个线程池在进行分块下载。
而在具体怎么实现的流程上,还是有差别的。
1. 标准的做法是:首先用一个线程发送HTTP GET指令,服务器会返回Content-Length,并能够根据协议判断出服务器是否支持Range。如果支持Range,则调配其它线程对后续部分分块下载。第一个线程继续下载第1块。
2. 还一种做法,首先发送HTTP HEAD指令,通过返回的Content-Length进行分块,调配线程进行下载。
这里提供一个类,属于第2种。
为了提高IO性能,类中可以使用内存映射文件方式进行操作。
#ifndef CHTTPFILEDOWNLOADER_H_
#define CHTTPFILEDOWNLOADER_H_
#include "Generic.h"
class CHttpFileDownloader {
public:
CHttpFileDownloader();
virtual ~CHttpFileDownloader();
bool downloadUrlToFile( const char * lpszUrl, const char * lpszFile);
bool waitForCompletion(void);
private:
string m_strLocalFile;
pthread_t m_lLeaderThread;
struct sockaddr_in m_stServerAddr;
char m_szResourceURI[1024];
char m_szDomain[1024];
char m_szHost[1024];
char m_szUrl[1024];
sem_t m_stDownSem;
pthread_mutex_t m_stDownloadThreadMutex;
int m_nDownloadThreadCnt;
bool m_bFailed;
sem_t m_stCompleteSem;
bool m_bSuccess;
static void * leaderThread(void* param);
static void * downloadThread(void* param);
bool downloadProcess(void);
void downloadBlock(unsigned char * pMemory, int nRangeStart, int nRangeSize);
bool sendBuffer( int nSocket, char * pBuf, int nSize);
bool sendStringStream(int nSocket, stringstream & oStream);
int recvStringStream(int nSocket, stringstream & oStream);
std::vector<string> parseResponse(string strResponse);
bool isHttpStatusSuccess(string & strHttpResponse);
string getHeaderValueByName(const char * lpszHeader, std::vector<string> & vItems);
};
#endif /* CHTTPFILEDOWNLOADER_H_ */
/*
* CHttpFileDownloader.cpp
*
* Created on: 2008-12-15
* Author: root
*/
#include "Generic.h"
#include "CMainApp.h"
#include "CHttpFileDownloader.h"
// 64K
#define DOWNLOAD_BLOCK_SIZE 1024*512
#define MAX_DOWNLOAD_THREAD 5
typedef struct _tagDownloadTask
{
CHttpFileDownloader * pThis;
unsigned char * pStart;
int nSize;
int nRangeStart;
}tagDownloadTask, *LPDownloadTask;
CHttpFileDownloader::CHttpFileDownloader() {
sem_init( &m_stCompleteSem, 0, 0);
}
CHttpFileDownloader::~CHttpFileDownloader() {
sem_destroy( &m_stCompleteSem );
}
bool CHttpFileDownloader::sendStringStream(int nSocket, stringstream & oStream)
{
int nSize = oStream.str().length() * sizeof(char);
char * pBuffer = new char[nSize];
memcpy( pBuffer, oStream.str().c_str(), nSize);
int nSent = 0;
while(nSent < nSize)
{
int nRet = send( nSocket, (char*)(pBuffer + nSent), nSize - nSent, 0);
if( nRet == -1 )
break;
nSent += nRet;
}
delete [] pBuffer;
return (nSent == nSize);
}
bool CHttpFileDownloader::sendBuffer( int nSocket, char * pBuf, int nSize)
{
int nSent = 0;
while(nSent < nSize)
{
int nRet = send( nSocket, (char*)(pBuf + nSent), nSize - nSent, 0);
if( nRet == -1 )
break;
nSent += nRet;
}
return (nSent == nSize);
}
int CHttpFileDownloader::recvStringStream(int nSocket, stringstream & oStream)
{
int nReceived = 0;
while(true)
{
char szBuf[1025] = {0};
int nRet = recv( nSocket, szBuf, 1024, 0);
if( nRet == 0 )
break;
if( nRet < 0 )
break;
oStream << szBuf;
nReceived += nRet;
if( oStream.str().find("\r\n\r\n") != string::npos )
break;
}
return nReceived;
}
std::vector<string> CHttpFileDownloader::parseResponse(string strResponse)
{
std::vector<string> vItems;
size_t nLast = strResponse.find("\r\n\r\n");
if( nLast >= strResponse.length() )
return vItems;
size_t nPos = 0;
while(nPos < nLast)
{
size_t nFind = strResponse.find("\r\n", nPos);
if( nFind > nLast )
break;
vItems.push_back( strResponse.substr( nPos, nFind-nPos));
nPos = nFind + 2;
}
return vItems;
}
bool CHttpFileDownloader::isHttpStatusSuccess(string & strHttpResponse)
{
size_t nBegin = strHttpResponse.find(' ');
if( nBegin > strHttpResponse.length() )
return false;
size_t nEnd = strHttpResponse.find_last_of(' ');
if( nEnd > strHttpResponse.length() )
return false;
string strStatusCode = strHttpResponse.substr( nBegin+1, nEnd-nBegin-1);
int nStatusCode = atoi(strStatusCode.c_str());
return (nStatusCode >= 200 && nStatusCode < 300);
}
string CHttpFileDownloader::getHeaderValueByName(const char * lpszHeader, std::vector<string> & vItems)
{
string strHeader = lpszHeader;
std::transform( strHeader.begin(), strHeader.end(), strHeader.begin(), (int (*)(int))std::tolower);
strHeader.append(":");
string strValue = "";
std::vector<string>::iterator iter;
for( iter = vItems.begin(); iter != vItems.end(); iter++)
{
string strItem = *iter;
std::transform( strItem.begin(), strItem.end(), strItem.begin(), (int (*)(int))std::tolower);
if( strItem.find(strHeader) != 0 )
continue;
strValue = strItem.substr( strHeader.length() );
break;
}
return strValue.erase( 0, strValue.find_first_not_of(' '));
}
bool CHttpFileDownloader::downloadUrlToFile( const char * lpszUrl, const char * lpszFile)
{
memset( m_szUrl, 0, 1024);
memcpy( m_szUrl, lpszUrl, strlen(lpszUrl));
m_strLocalFile = lpszFile;
// create thread
int nErr = pthread_create( &m_lLeaderThread
, NULL
, &leaderThread
, this
);
if( nErr != 0 )
{
CMainApp::getSingleton()->log("Error: pthread_create download leader thread failed. Return=%d, Message=%s"
, nErr
, strerror(nErr)
);
return false;
}
return true;
}
bool CHttpFileDownloader::waitForCompletion(void)
{
sem_wait(&m_stCompleteSem);
return m_bSuccess;
}
void * CHttpFileDownloader::leaderThread(void* param)
{
CHttpFileDownloader * pThis = static_cast<CHttpFileDownloader*>(param);
CMainApp::getSingleton()->log("Info: download file \"%s\" start..."
, pThis->m_szUrl
);
pThis->m_bSuccess = pThis->downloadProcess();
sem_post( &pThis->m_stCompleteSem );
CMainApp::getSingleton()->log("Info: download file \"%s\" %s..."
, pThis->m_szUrl
, pThis->m_bSuccess ? "success" : "failed"
);
return NULL;
}
bool CHttpFileDownloader::downloadProcess(void)
{
// parse the url and port
string strUrl = m_szUrl;
std::transform( strUrl.begin(), strUrl.end(), strUrl.begin(), (int (*)(int))std::tolower);
size_t uFind = strUrl.find("http://");
if( uFind != 0 )
{
CMainApp::getSingleton()->log("Error: Invalid URL:%s"
, m_szUrl
);
return false;
}
int nLen = string("http://").length();
uFind = strUrl.find('/', nLen);
if( uFind > strUrl.length() )
{
CMainApp::getSingleton()->log("Error: Invalid URL:%s"
, m_szUrl
);
return false;
}
strUrl = m_szUrl;
string strHost = strUrl.substr( nLen, uFind - nLen);
string strResourceURI = strUrl.substr(uFind);
string strDomain = strHost;
uint uPort = 80;
uFind = strHost.find(':');
if( uFind < strHost.length() )
{
strDomain = strHost.substr( 0, uFind);
uPort = atoi( strHost.substr(uFind+1).c_str() );
}
struct hostent * pHostent = gethostbyname(strDomain.c_str());
if( pHostent == NULL )
{
CMainApp::getSingleton()->log("Error: failed to resolve the IP address for the URL:%s"
, m_szUrl
);
return false;
}
memset( &m_stServerAddr, 0, sizeof(m_stServerAddr));
m_stServerAddr.sin_family = AF_INET;
m_stServerAddr.sin_port = htons((short)uPort);
memcpy( (char*)&m_stServerAddr.sin_addr.s_addr, pHostent->h_addr_list[0], pHostent->h_length);
int sock = socket( AF_INET, SOCK_STREAM, 0);
if( sock == -1 )
{
CMainApp::getSingleton()->log("Error: socket failed. error=%s"
, strerror(errno)
);
return false;
}
memset( m_szResourceURI, 0, 1024);
memcpy( m_szResourceURI, strResourceURI.c_str(), strlen(strResourceURI.c_str()));
memset( m_szHost, 0, 1024);
memcpy( m_szHost, strHost.c_str(), strlen(strHost.c_str()));
memset( m_szDomain, 0, 1024);
memcpy( m_szDomain, strDomain.c_str(), strlen(strDomain.c_str()));
// populate the HTTP HEAD request
stringstream strHttp;
strHttp << "HEAD " << m_szResourceURI << " HTTP/1.1\r\n";
strHttp << "User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n";
strHttp << "Host:" << m_szHost << "\r\n";
strHttp << "Cache-Control: no-cache\r\n";
strHttp << "Pragma: no-cache\r\n";
strHttp << "Connection: Keep-Alive\r\n";
strHttp << "Accept: */*\r\n";
strHttp << "\r\n";
int nRet = connect( sock
, (struct sockaddr *)&m_stServerAddr
, sizeof(struct sockaddr)
);
if( nRet == -1 )
{
CMainApp::getSingleton()->log("Error: failed to connect to URL:%s"
, m_szUrl
);
return false;
}
struct timeval tv = {0};
tv.tv_sec = 15;
if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)))
{
CMainApp::getSingleton()->log("Error: setsockopt failed(1). error=%s"
, strerror(errno)
);
return false;
}
if( !sendStringStream( sock, strHttp) )
{
CMainApp::getSingleton()->log("Error: failed to send the HTTP HEAD request to URL:%s"
, m_szUrl
);
return false;
}
stringstream strResponse;
recvStringStream( sock, strResponse);
shutdown(sock, SHUT_RDWR);
close(sock);
// parse the response
std::vector<string> vItems = parseResponse(strResponse.str());
if( vItems.size() == 0 )
{
CMainApp::getSingleton()->log("Error: the HTTP HEAD response contains nothing. URL:%s"
, m_szUrl
);
return false;
}
if( !isHttpStatusSuccess(vItems[0]) )
{
CMainApp::getSingleton()->log("Error: %s. URL:%s"
, vItems[0].c_str()
, m_szUrl
);
return false;
}
string strContentLen = getHeaderValueByName( "Content-Length", vItems);
if( strContentLen.length() == 0 )
{
CMainApp::getSingleton()->log("Error: Invalid Content-Length in HTTP HEAD response. URL:%s"
, m_szUrl
);
return false;
}
int nContentLength = atoi(strContentLen.c_str());
// create file
std::ofstream outStream;
outStream.open( m_strLocalFile.c_str(), ios_base::out | ios_base::binary | ios_base::trunc);
outStream.seekp(nContentLength-1);
outStream.put('\0');
outStream.close();
int fd = open( m_strLocalFile.c_str(), O_RDWR);
if( fd == -1 )
{
CMainApp::getSingleton()->log("Error: can not create file \"%s\". %s"
, m_strLocalFile.c_str()
, strerror(errno)
);
return false;
}
unsigned char * pMemory = (unsigned char *)mmap(NULL, nContentLength, PROT_WRITE, MAP_SHARED | MAP_POPULATE | MAP_NONBLOCK, fd, 0);
close(fd);
if( pMemory == MAP_FAILED )
{
CMainApp::getSingleton()->log("Error: failed to map the file \"%s\" into memory; size=%d; error=%s"
, m_strLocalFile.c_str()
, nContentLength
, strerror(errno)
);
return false;
}
mlock(pMemory, nContentLength);
// create thread
sem_init( &m_stDownSem, 0, MAX_DOWNLOAD_THREAD);
pthread_mutex_init( &m_stDownloadThreadMutex, NULL);
m_bFailed = false;
int nDownloadLength = 0;
m_nDownloadThreadCnt = 0;
while(true)
{
sem_wait(&m_stDownSem);
if( nDownloadLength >= nContentLength ||
m_bFailed )
{
if( m_nDownloadThreadCnt == 0 )
break;
else
continue;
}
LPDownloadTask pTask = (LPDownloadTask)malloc(sizeof(tagDownloadTask));
pTask->pStart = (unsigned char*)(pMemory + nDownloadLength);
pTask->nSize = ((DOWNLOAD_BLOCK_SIZE + nDownloadLength) > nContentLength)
? (nContentLength - nDownloadLength) : DOWNLOAD_BLOCK_SIZE;
pTask->nRangeStart = nDownloadLength;
pTask->pThis = this;
nDownloadLength += pTask->nSize;
// create thread
pthread_t lThread;
int nErr = pthread_create( &lThread
, NULL
, &downloadThread
, pTask
);
if( nErr != 0 )
{
CMainApp::getSingleton()->log("Error: pthread_create download thread failed. Error=%d, Message=%s"
, nErr
, strerror(nErr)
);
m_bFailed = true;
}
else
{
pthread_mutex_lock(&m_stDownloadThreadMutex);
m_nDownloadThreadCnt ++;
pthread_mutex_unlock(&m_stDownloadThreadMutex);
}
}
pthread_mutex_destroy( &m_stDownloadThreadMutex);
sem_destroy(&m_stDownSem);
if( msync( pMemory, nContentLength, MS_SYNC) == -1 )
{
CMainApp::getSingleton()->log("Error: failed to msync the file \"%s\" from memory; size=%d; error=%s"
, m_strLocalFile.c_str()
, nContentLength
, strerror(errno)
);
m_bFailed = true;
}
munlock(pMemory, nContentLength);
munmap( pMemory, nContentLength);
return !m_bFailed;
}
void * CHttpFileDownloader::downloadThread(void* param)
{
LPDownloadTask pTask = static_cast<LPDownloadTask>(param);
pTask->pThis->downloadBlock( pTask->pStart
, pTask->nRangeStart
, pTask->nSize
);
pthread_mutex_lock(&(pTask->pThis->m_stDownloadThreadMutex));
pTask->pThis->m_nDownloadThreadCnt --;
pthread_mutex_unlock(&(pTask->pThis->m_stDownloadThreadMutex));
sem_post(&(pTask->pThis->m_stDownSem));
free(pTask);
return NULL;
}
void CHttpFileDownloader::downloadBlock(unsigned char * pMemory, int nRangeStart, int nRangeSize)
{
CMainApp::getSingleton()->log("Info: download block \"%s\" [%08d-%08d] start..."
, m_szUrl
, nRangeStart
, nRangeStart + nRangeSize - 1
);
int nReceived = 0;
int nErrorTimes = 0;
while( nReceived < nRangeSize && nErrorTimes < 5 && !m_bFailed )
{
int nSocket = socket( AF_INET, SOCK_STREAM, 0);
if( nSocket == -1 )
{
nErrorTimes++;
continue;
}
int nRet = connect( nSocket
, (struct sockaddr *)&m_stServerAddr
, sizeof(struct sockaddr)
);
if( nRet == -1 )
{
nErrorTimes++;
CMainApp::getSingleton()->log("Error: failed to connect to URL:%s"
, m_szUrl
);
continue;
}
{
struct timeval tv = {0};
tv.tv_sec = 15;
if (setsockopt(nSocket, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)))
{
nErrorTimes++;
CMainApp::getSingleton()->log("Error: setsockopt failed(2). error=%s"
, strerror(errno)
);
continue;
}
}
{
char szRequest[4096] = {0};
sprintf( szRequest, "GET %s HTTP/1.1\r\n"
"User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n"
"Host: %s\r\n"
"Cache-Control: no-cache\r\n"
"Pragma: no-cache\r\n"
"Connection: Keep-Alive\r\n"
"Accept: */*\r\n"
"Range: bytes=%d-%d\r\n"
"\r\n"
, m_szResourceURI
, m_szHost
, nRangeStart+nReceived
, nRangeStart+nRangeSize-1
);
if( !sendBuffer( nSocket, szRequest, strlen(szRequest)) )
{
shutdown(nSocket, SHUT_RDWR);
close(nSocket);
CMainApp::getSingleton()->log("Error: failed to send the HTTP GET request to URL:%s"
, m_szUrl
);
nErrorTimes++;
continue;
}
}
char szBuf[1024] = {0};
nRet = recv( nSocket, szBuf, 1024, 0);
if( nRet <= 0 )
{
shutdown(nSocket, SHUT_RDWR);
close(nSocket);
CMainApp::getSingleton()->log("Error: recv failed(1). return code=%d, error=%s, URL=%s"
, nRet
, strerror(errno)
, m_szUrl
);
nErrorTimes++;
continue;
}
string strHttpResponse;
int nRemain = 0;
int nIndex = 0;
for(nIndex = 0; nIndex < nRet; nIndex++)
{
if( szBuf[nIndex] == '\r' &&
szBuf[nIndex+1] == '\n' &&
szBuf[nIndex+2] == '\r' &&
szBuf[nIndex+3] == '\n' )
{
char szTemp[1025] = {0};
memcpy( szTemp, szBuf, nIndex+4);
strHttpResponse = szTemp;
nRemain = nRet -(nIndex+4);
break;
}
}
if( strHttpResponse.length() == 0 )
{
shutdown(nSocket, SHUT_RDWR);
close(nSocket);
nErrorTimes++;
CMainApp::getSingleton()->log("Error: the response does not contain a HTTP header(1), URL:%s"
, m_szUrl
);
continue;
}
std::vector<string> vItems = parseResponse(strHttpResponse);
if( vItems.size() == 0 )
{
shutdown(nSocket, SHUT_RDWR);
close(nSocket);
nErrorTimes++;
CMainApp::getSingleton()->log("Error: the response does not contain a HTTP header(2). URL:%s"
, m_szUrl
);
continue;
}
if( !isHttpStatusSuccess(vItems[0]) )
{
shutdown(nSocket, SHUT_RDWR);
close(nSocket);
nErrorTimes++;
CMainApp::getSingleton()->log("Error: %s. URL:%s"
, vItems[0].c_str()
, m_szUrl
);
continue;
}
if( nRemain > 0 )
{
memcpy( (unsigned char*)(pMemory+nReceived), &(szBuf[nIndex+4]), nRemain);
nReceived += nRemain;
}
while( (nReceived < nRangeSize) && !m_bFailed )
{
nRet = recv( nSocket, (unsigned char*)(pMemory+nReceived), nRangeSize - nReceived, 0);
if( nRet <= 0 )
{
CMainApp::getSingleton()->log("Error: recv falied(2). return code=%d, error=%s, URL=%s"
, nRet
, strerror(errno)
, m_szUrl
);
nErrorTimes++;
break;
}
nReceived += nRet;
}
shutdown(nSocket, SHUT_RDWR);
close(nSocket);
}// while
m_bFailed = m_bFailed ? m_bFailed : (nReceived != nRangeSize);
CMainApp::getSingleton()->log("Info: download block \"%s\" [%08d-%08d] %s."
, m_szUrl
, nRangeStart
, nRangeStart + nRangeSize - 1
, (nReceived != nRangeSize) ? "Failed" : "Success"
);
}