c++实验二

实验目的: 

熟练使用类

了解http网络协议

利用http网络协议保存网页

实验内容: 

1、              实现http网络应用

2、              分析程序基本功能

3、              用http协议方式构建自己的一个网络应用,获取网页中超链接1层以上的内容(即不但要保存当前的页面,还要链接到相关页面,并保存其内容)

4、              撰写实验报告

先在C盘下建立一个名为lab3的文件夹,运行输入:tear http://www/.***(你要获取的链接),结果保留在此文件夹内。

头文件:

// tear.h : class definitions for CTearSession and CTearException 
// 
// This is a part of the Microsoft Foundation Classes C++ library. 
// Copyright (c) Microsoft Corporation. All rights reserved. 
// 
// This source code is only intended as a supplement to the 
// Microsoft Foundation Classes Reference and related 
// electronic documentation provided with the library. 
// See these sources for detailed information regarding the 
// Microsoft Foundation Classes product. 

// Created by Mike Blaszczak. 
#include <afx.h> 
#include <afxwin.h> 
#include <afxinet.h>
#include <stdlib.h> 
#include <iostream>
using namespace std;

///////////////////////////////////////////////////////////////////////////// 
// Globals 

LPCTSTR pszURL = NULL; 
BOOL bStripMode = FALSE; 
BOOL bProgressMode = FALSE; 
DWORD dwAccessType = PRE_CONFIG_INTERNET_ACCESS; 

DWORD dwHttpRequestFlags = INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_NO_AUTO_REDIRECT; 

const TCHAR szHeaders[] = _T("Accept: text/*\r\nUser-Agent: MFC_Tear_Sample\r\n"); 

///////////////////////////////////////////////////////////////////////////// 
// CTearSession object 

// TEAR wants to use its own derivative of the CInternetSession class 
// just so it can implement an OnStatusCallback() override. 

class CTearSession : public CInternetSession 
{ 
public: 
	CTearSession(LPCTSTR pszAppName, int nMethod); 
	virtual void OnStatusCallback(DWORD dwContext, DWORD dwInternetStatus, 
		LPVOID lpvStatusInfomration, DWORD dwStatusInformationLen); 
}; 


CTearSession::CTearSession(LPCTSTR pszAppName, int nMethod) 
: CInternetSession(pszAppName, 1, nMethod) 
{ 
} 

void CTearSession::OnStatusCallback(DWORD 
									/* dwContext */
									, DWORD dwInternetStatus, 
									LPVOID 
									/* lpvStatusInfomration */
									, DWORD 
									/* dwStatusInformationLen */
									) 
{ 
	if (!bProgressMode) 
		return; 
	
	if (dwInternetStatus == INTERNET_STATUS_CONNECTED_TO_SERVER) 
		cerr << _T("Connection made!") << endl; 
} 


///////////////////////////////////////////////////////////////////////////// 
// CTearException -- used if something goes wrong for us 

// TEAR will throw its own exception type to handle problems it might 
// encounter while fulfilling the user's request. 

class CTearException : public CException 
{ 
	DECLARE_DYNCREATE(CTearException) 
		
public: 
	CTearException(int nCode = 0); 
	~CTearException() { } 
	
	int m_nErrorCode; 
};



IMPLEMENT_DYNCREATE(CTearException, CException) 

CTearException::CTearException(int nCode) 
: m_nErrorCode(nCode) 
{ 
} 

void ThrowTearException(int nCode) 
{ 
	CTearException* pEx = new CTearException(nCode); 
	throw pEx; 
} 

cpp文件:

// tear.cpp : implements the TEAR console application 
// 
// This is a part of the Microsoft Foundation Classes C++ library. 
// Copyright (c) Microsoft Corporation. All rights reserved. 
// 
// This source code is only intended as a supplement to the 
// Microsoft Foundation Classes Reference and related 
// electronic documentation provided with the library. 
// See these sources for detailed information regarding the 
// Microsoft Foundation Classes product. 

 
#include "tear.h" 
#include <vector>
#include <string>
#include <iostream> 
#include <time.h>
#pragma warning(push) 
#pragma warning(disable: 4100 4667) 
#pragma warning(pop) 
using namespace std; 


///////////////////////////////////////////////////////////////////////////// 
// Routines 

void ShowBanner() 
{ 
	cerr << _T("TEAR - Tear a Page Off the Internet!") << endl; 
	cerr << _T("Version 7.0 - Copyright (C) Microsoft Corp.") << endl; 
	cerr << endl; 
} 

void ShowUsage() 
{ 
	cerr << _T("Usage: TEAR [options] <URL>") << endl << endl; 
	cerr << _T("\t<URL> points at a HTTP resource") << endl; 
	cerr << _T("\t[options] are any of:") << endl; 
	cerr << _T("\t\t/F force reload of requested page") << endl; 
	cerr << _T("\t\t/P show detailed progress information") << endl; 
	cerr << _T("\t\t/S strip HTML tags from stream") << endl << endl; 
	cerr << _T("\t\t/L use local Internet access") << endl; 
	cerr << _T("\t\t/D use pre-configured Internet access (default)") << endl; 
	
	cerr << endl; 
	exit(1); 
} 

// ParseOptions() looks for options on the command line and sets global 
// flags so the rest of the program knows about them. ParseOptions() 
// also initializes pszURL to point at the URL the user wanted. 

BOOL ParseOptions(int argc, char* argv[]) 
{ 
	int nIndex; 
	for (nIndex = 1; nIndex < argc; nIndex++) 
	{ 
		// an option or a URL? 
		if (*argv[nIndex] == '-' || *argv[nIndex] == '/') 
		{ 
			if (argv[nIndex][1] == 'D' || argv[nIndex][1] == 'd') 
				dwAccessType = PRE_CONFIG_INTERNET_ACCESS; 
			else if (argv[nIndex][1] == 'L' || argv[nIndex][1] == 'l') 
				dwAccessType = LOCAL_INTERNET_ACCESS; 
			else if (argv[nIndex][1] == 'S' || argv[nIndex][1] == 's') 
				bStripMode = TRUE; 
			else if (argv[nIndex][1] == 'P' || argv[nIndex][1] == 'p') 
				bProgressMode = TRUE; 
			else if (argv[nIndex][1] == 'F' || argv[nIndex][1] == 'f') 
				dwHttpRequestFlags |= INTERNET_FLAG_RELOAD; 
			else 
			{ 
				cerr << _T("Error: unrecognized option: ") << argv[nIndex] << endl; 
				return FALSE; 
			} 
		} 
		else 
		{ 
			// can't have too many URLs 
			if (pszURL != NULL) 
			{ 
				cerr << _T("Error: can only specify one URL!") << endl; 
				return FALSE; 
			} 
			else 
				pszURL = argv[nIndex]; 
		} 
	} 
	
	return TRUE; 
} 

// StripTags() rips through a buffer and removes HTML tags from it. 
// The function uses a static variable to remember its state in case 
// a HTML tag spans a buffer boundary. 

void StripTags(LPTSTR pszBuffer) 
{ 
	static BOOL bInTag = FALSE; 
	LPTSTR pszSource = pszBuffer; 
	LPTSTR pszDest = pszBuffer; 
	
	while (*pszSource != '\0') 
	{ 
		if (bInTag) 
		{ 
			if (*pszSource == '>') 
				bInTag = FALSE; 
			pszSource++; 
		} 
		else 
		{ 
			if (*pszSource == '<') 
				bInTag = TRUE; 
			else 
			{ 
				*pszDest = *pszSource; 
				pszDest++; 
			} 
			pszSource++; 
		} 
	} 
	*pszDest = '\0'; 
} 

///////////////////////////////////////////////////////////////////////////// 
// The main() Thang 

int main(int argc, char* argv[]) 
{ 
//	ShowBanner(); 
	
	if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0)) 
	{ 
		cerr << _T("MFC Failed to initialize.\n"); 
		return 1; 
	} 
	
	if (argc < 2 || !ParseOptions(argc, argv) || pszURL == NULL) 
		ShowUsage(); 
	
	int nRetCode = 0; 
	
	CTearSession session(_T("TEAR - MFC Sample App"), dwAccessType); 
	CHttpConnection* pServer = NULL; //MFC类CHttpConnection管理与HTTP服务器的连接
	CHttpFile* pFile = NULL; //If your Internet session reads data from an HTTP server, you must create an instance of CHttpFile. 
	try 
	{ 
		// check to see if this is a reasonable URL 

		///////////////////////////////////////////////////////
		CString vec[10000];
		int idx = 0, ca = 0;
		vec[idx++] = pszURL;

		while(true)
		{
			CString strServerName; 
		    CString strObject; 
			INTERNET_PORT nPort; 
			DWORD dwServiceType; 
			char fileName[100];	
			if(ca >= idx) break;
			if(ca == 0) freopen("c:/lab3/1.html", "w", stdout);
			else{
				sprintf(fileName, "c:/lab3/a%d.html", ca);	
				freopen(fileName, "w", stdout);
			}

			pszURL = vec[ca];
			if (!AfxParseURL(pszURL, dwServiceType, strServerName, strObject, nPort) || 
				dwServiceType != INTERNET_SERVICE_HTTP) 
			{ 
				cerr << _T("Error: can only use URLs beginning with http://") << endl; 
				++ ca; continue;
				ThrowTearException(1); 
			} 

			if (bProgressMode) 
			{ 
				cerr << _T("Opening Internet..."); 
				VERIFY(session.EnableStatusCallback(TRUE)); 
			} 

			pServer = session.GetHttpConnection(strServerName, nPort); 

			pFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET, 
				strObject, NULL, 1, NULL, NULL, dwHttpRequestFlags); 
			pFile->AddRequestHeaders(szHeaders); 
			pFile->SendRequest(); 

			DWORD dwRet; 
			pFile->QueryInfoStatusCode(dwRet); 

			// if access was denied, prompt the user for the password 

			if (dwRet == HTTP_STATUS_DENIED) 
			{ 
				DWORD dwPrompt; 
				dwPrompt = pFile->ErrorDlg(NULL, ERROR_INTERNET_INCORRECT_PASSWORD, 
					FLAGS_ERROR_UI_FLAGS_GENERATE_DATA | FLAGS_ERROR_UI_FLAGS_CHANGE_OPTIONS, NULL); 

				// if the user cancelled the dialog, bail out 

				if (dwPrompt != ERROR_INTERNET_FORCE_RETRY) 
				{ 
					cerr << _T("Access denied: Invalid password\n"); 
					++ ca; continue;
					ThrowTearException(1); 
				} 

				pFile->SendRequest(); 
				pFile->QueryInfoStatusCode(dwRet); 
			} 

			CString strNewLocation; 
			pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation); 

			// were we redirected? 
			// these response status codes come from WININET.H 

			if (dwRet == HTTP_STATUS_MOVED || 
				dwRet == HTTP_STATUS_REDIRECT || 
				dwRet == HTTP_STATUS_REDIRECT_METHOD) 
			{ 
				CString strNewLocation; 
				pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation); 

				int nPlace = strNewLocation.Find(_T("Location: ")); 
				if (nPlace == -1) 
				{ 
					cerr << _T("Error: Site redirects with no new location") << endl; 
					++ ca; continue;
					ThrowTearException(2); 
				} 

				strNewLocation = strNewLocation.Mid(nPlace + 10); 
				nPlace = strNewLocation.Find('\n'); 
				if (nPlace > 0) 
					strNewLocation = strNewLocation.Left(nPlace); 

				// close up the redirected site 

				pFile->Close(); 
				delete pFile; 
				pServer->Close(); 
				delete pServer; 

				if (bProgressMode) 
				{ 
					cerr << _T("Caution: redirected to "); 
					cerr << (LPCTSTR) strNewLocation << endl; 
				} 

				// figure out what the old place was 
				if (!AfxParseURL(strNewLocation, dwServiceType, strServerName, strObject, nPort)) 
				{ 
					cerr << _T("Error: the redirected URL could not be parsed.")<<endl; 
					++ ca;
					continue;
					ThrowTearException(2); 
				} 

				if (dwServiceType != INTERNET_SERVICE_HTTP) 
				{ 
					cerr << _T("Error: the redirected URL does not reference a HTTP resource.") << endl; 
					++ ca; continue;
					ThrowTearException(2); 
				} 

				// try again at the new location 
				pServer = session.GetHttpConnection(strServerName, nPort); 
				pFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET, 
					strObject, NULL, 1, NULL, NULL, dwHttpRequestFlags); 
				pFile->AddRequestHeaders(szHeaders); 
				pFile->SendRequest(); 

				pFile->QueryInfoStatusCode(dwRet); 
				if (dwRet != HTTP_STATUS_OK) 
				{ 
					cerr << _T("Error: Got status code ") << dwRet << endl; 
					++ ca; continue;
					ThrowTearException(2); 
				} 
			} 

			cerr << _T("Status Code is ") << dwRet << endl; 

			TCHAR sz[1024];
		    
			while (pFile->ReadString(sz, 1023)) 
			{ 
				if (bStripMode) 
					StripTags(sz); 
			    cout<<sz<<endl;
				
				if(ca == 0){
					CString cs;
					cs.Format(_T("%s"), sz);     //change TCHAR to CString
					string str = cs.GetBuffer(0); //change CString to string 
					string::size_type pos1 = 0, pos2;
					while((pos1 = str.find("href=\"http:/", pos1)) != string::npos){ //查找二层URL
						pos1 += 6;
						pos2 = str.find("\"",pos1);
						string temp = str.substr(pos1, pos2-pos1);
						pos1 = pos2;
						CString temp2;
						temp2.Format(_T("%s"), temp.c_str());
						vec[idx++] = temp2;
					}
				}	
			} 


			// NOTE: Since HTTP servers normally spit back plain text, the 
			// above code (which reads line by line) is just fine. However, 
			// other data sources (eg, FTP servers) might provide binary data 
			// which should be handled a buffer at a time, like this: 

#if 0 
			while (nRead > 0) 
			{ 
				sz[nRead] = '\0'; 
				if (bStripMode) 
					StripTags(sz); 
				cout << sz; 
				nRead = pFile->Read(sz, 1023); 
			} 
#endif 

			pFile->Close(); 
			pServer->Close(); 
			++ ca;
		}
} 
catch (CInternetException* pEx) 
{ 
	// catch errors from WinINet 
	
	TCHAR szErr[1024]; 
	pEx->GetErrorMessage(szErr, 1024); 
	
	cerr << _T("Error: (") << pEx->m_dwError << _T(") "); 
	cerr << szErr << endl; 
	
	nRetCode = 2; 
	pEx->Delete(); 
} 
catch (CTearException* pEx) 
{ 
	// catch things wrong with parameters, etc 
	
	nRetCode = pEx->m_nErrorCode; 
	TRACE1("Error: Exiting with CTearException(%d)\n", nRetCode); 
	pEx->Delete(); 
} 

if (pFile != NULL) 
delete pFile; 
if (pServer != NULL) 
delete pServer; 
session.Close(); 

return nRetCode; 
}

posted on 2010-11-15 09:14  CrazyAC  阅读(474)  评论(0编辑  收藏  举报