socket抓取网页

#include <iostream>
#include <string>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <strings.h>
#include <string.h>
#include <unistd.h>
using namespace std;

void func()
{
	string url;
	cout << "输入网址:" << endl;
	cin >> url;
	
	//依据域名获取ip地址
	struct hostent *website_host = NULL;
	website_host = gethostbyname(url.c_str());
	if (website_host == NULL)
	{
		perror("gethostbyname error");
		exit(-1);
	}
	cout << "主机名称:";
	cout << website_host->h_name << endl;
	cout << "地址类型:";
	cout << website_host->h_addrtype << endl;
	cout << "地址长度:";
	cout << website_host->h_length << endl;
	
	//建立socket描写叙述符
	int sockfd;
	sockfd = socket(AF_INET, SOCK_STREAM, 0);
	if (sockfd == -1)
	{
		perror("socket error");
		exit(-1);
	}
	cout << "建立socket完毕" << endl; 
	
	//初始化地址结构
	struct sockaddr_in website_addr;
	bzero((void*)&website_addr, sizeof(website_addr));
	website_addr.sin_family = AF_INET;
	website_addr.sin_port = htons(80);
	website_addr.sin_addr.s_addr = ((struct in_addr *)(website_host->h_addr))->s_addr;
	cout << "地址初始化完毕" << endl; 

	//连接
	int ret;
	ret = connect(sockfd, (struct sockaddr*)&website_addr, sizeof(website_addr));
	if (ret == -1)
	{
		perror("connect error");
		exit(-1);
	}
	cout << "连接完毕" << endl;
	
	//向80端口发送http头
	char buf[10*1024];
	char addr[100];
	sprintf(buf, "GET / HTTP/1.1\r\n");
	strcat(buf, "Host:");
	strcat(buf,url.c_str());
	strcat(buf, "\r\n");
	strcat(buf, "Accept: */*\r\n");
	strcat(buf, "User-Agent: Mozilla/4.0(compatible)\r\n");
	strcat(buf, "connection:Keep-Alive\r\n");
	strcat(buf, "\r\n\r\n"); 
	cout << "请求头构造完毕" << endl;
	cout << buf << endl;
	ret = send(sockfd, buf, strlen(buf), 0);
	cout << "发送完毕" << endl;
	cout << "send:\n" << ret << endl;
	
	//打开接收文件
	int fd;
	fd = open("recv.html", O_RDWR);
	if (fd == -1)
	{
		perror("open error");
		exit(-1);
	}
	
	//開始接收
	while(1)
	{
		ret = recv(sockfd, buf, sizeof(buf), 0);
		if (ret == 0)
		{
			cout << "对端关闭" << endl;
			exit(-1);
		}
		if (ret == -1)
		{
			perror("read error");
			exit(-1);
		}
		buf[ret] = 0;
		cout << "recv:" << ret << endl;
		cout << buf << endl;
		write(fd, buf, strlen(buf));
	}
}

int main()
{
	func();
	return 0;
}






版权声明:本文博客原创文章,博客,未经同意,不得转载。

posted @ 2015-07-20 09:53  phlsheji  阅读(635)  评论(0编辑  收藏  举报