C/C++ 实现URL路径拆分
URL路径拆分: 例如我们传入 http://www.baidu.com/index.php 拆分为 www.baidu.com 和 /index.php
#include <Windows.h>
#include <iostream>
int ParseUrl(char szUrl[], char szHost[], char szPath[])
{
int iStart = 0;
int iEnd = 0;
int iLen = 0;
if (strncmp(szUrl, "http://", 7) == 0)
iStart = 7;
else if (strncmp(szUrl, "https://", 8) == 0)
iStart = 8;
while (szUrl[iStart + iLen] != '\0' && szUrl[iStart + iLen] != '/')
{ iLen++; }
memcpy(szHost, szUrl + iStart, iLen);
if (strlen(szUrl) - iStart - iLen == 0)
szPath[0] = '/';
else
memcpy(szPath, szUrl + iStart + iLen, strlen(szUrl) - iStart - iLen);
return 0;
}
int main(int argc,char *argv [])
{
char szUrl[] = "http://www.baidu.com/index.html";
char szHost[1024] = { 0 };
char szPath[2048] = { 0 };
int ret = ParseUrl(szUrl,szHost,szPath);
if (ret == 0)
{
printf("主机: %s \n", szHost);
printf("路径: %s \n", szPath);
}
system("pause");
return 0;
}
http 文件下载
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <winsock.h>
#pragma comment(lib,"ws2_32.lib")
int Spide(const char *pszUrl, const char *pszFile)
{
char szHost[256] = {0};
char *ptr = (char *)pszUrl;
// 判断开头是否为http://如果不是则返回-1
if (_strnicmp(ptr, "http://", 7) != 0) { return -1; }
ptr = ptr + 7;
int index = 0;
while (index < 255 && *ptr && *ptr != '/')
{
szHost[index++] = *ptr++;
}
szHost[index] = '\0';
//printf("去掉http后的域名地址: %s \n", szHost);
char *buffer = new char[1024 * 8];
index = sprintf(buffer,
"GET %s HTTP/1.1\r\n"
"Host: %s\r\n"
"User-Agent: IE or Chrome\r\n"
"Accept-Type: */*\r\n"
"Connection: Close\r\n\r\n",
ptr, szHost);
//printf("构建好的请求头:\n %s \n", buffer);
// ------------------------------------------------------------
SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
SOCKADDR_IN addr;
addr.sin_addr.S_un.S_addr = 0;
addr.sin_port = htons(0);
addr.sin_family = AF_INET;
index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
hostent *p = ::gethostbyname(szHost);
if (p) {
ULONG ai = *(ULONG*)p->h_addr_list[0];
addr.sin_addr.S_un.S_addr = ai;
addr.sin_port = htons(80);
index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
if (index == NOERROR) {
index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
FILE *pf = fopen(pszFile, "wb");
do {
index = recv(fd, buffer, 8191, 0);
if (index <= 0) {
break;
}
buffer[index] = '\0';
fwrite(buffer, 1, index, pf);
printf("%s", buffer);
} while (TRUE);
fclose(pf);
}
}
closesocket(fd);
delete[] buffer;
return 0;
}
int main(int argc,char *argv[])
{
WSADATA wsaData;
WSAStartup(0x0202, &wsaData);
Spide("http://cn.bing.com/","index.html");
system("pause");
return 0;
}
实现HTTP页面下载功能
#include <Windows.h>
#include <iostream>
#include <winsock.h>
#pragma comment(lib,"ws2_32.lib")
int Curl_Get(const char *pszUrl)
{
char szHost[256] = { 0 };
char *ptr = (char *)pszUrl;
// 判断开头是否为http:// 或者 https:// 如果不是则返回-1
if (_strnicmp(ptr, "http://", 7) == 0)
ptr = ptr + 7;
else if (_strnicmp(ptr, "https://", 8) == 0)
ptr = ptr + 8;
else
return -1;
int index = 0;
while (index < 255 && *ptr && *ptr != '/')
szHost[index++] = *ptr++;
szHost[index] = '\0';
char *buffer = new char[1024 * 8];
index = sprintf(buffer,
"GET %s HTTP/1.1 \r\n"
"Host: %s \r\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 \r\n"
"Accept-Type: */* \r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 \r\n"
"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 \r\n"
"Connection: Close \r\n\r\n",
ptr, szHost);
printf("%s \n", buffer);
SOCKADDR_IN addr;
SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
addr.sin_addr.S_un.S_addr = 0;
addr.sin_port = htons(0);
addr.sin_family = AF_INET;
index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
hostent *p = gethostbyname(szHost);
if (p)
{
ULONG ai = *(ULONG*)p->h_addr_list[0];
addr.sin_addr.S_un.S_addr = ai;
addr.sin_port = htons(80);
index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
if (index == NOERROR)
{
index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
do
{
index = recv(fd, buffer, 8191, 0);
if (index <= 0) { break; }
buffer[index] = '\0';
printf("%s \n", buffer);
} while (TRUE);
}
}
closesocket(fd);
return 0;
}
int main(int argc, char *argv[])
{
WSADATA wsaData;
WSAStartup(0x0202, &wsaData);
Curl_Get("http://cn.bing.com/");
WSACleanup();
system("pause");
return 0;
}
文章出处:https://www.cnblogs.com/LyShark/p/12921581.html
本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!
本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!
标签:
Visual C++
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
2018-05-20 Wget/httrack 爬取整站资源
2018-05-20 CentOS/RedHat 常用Yum镜像源