php使用服务器进行远程抓取百度网页内容

   php使用服务器进行远程抓取百度网页内容

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
<?php
error_reporting(E_ALL^E_NOTICE^E_WARNING);
$useragent= $_SERVER['HTTP_USER_AGENT'];
//获取客户端ip
function getip() {
    $unknown = 'unknown';
    if (isset($_SERVER['HTTP_X_FORWARDED_FOR']) && $_SERVER['HTTP_X_FORWARDED_FOR'] && strcasecmp($_SERVER['HTTP_X_FORWARDED_FOR'], $unknown)) {
        $ip = $_SERVER['HTTP_X_FORWARDED_FOR'];
    }
    elseif(isset($_SERVER['REMOTE_ADDR']) && $_SERVER['REMOTE_ADDR'] && strcasecmp($_SERVER['REMOTE_ADDR'], $unknown)) {
        $ip = $_SERVER['REMOTE_ADDR'];
    }
    if (false !== strpos($ip, ',')) $ip = reset(explode(',', $ip));
    return $ip;
}
 
function get_client_ip(){
    $cip = "unknown";
    if($_SERVER['REMOTE_ADDR']){
        $cip = $_SERVER['REMOTE_ADDR'];
    }else if(getenv("REMOTE_ADDR")){
        $cip = getenv("REMOTE_ADDR");
    }
    return $cip;
}
 
//添加关键词
$word=[
 '医院',
];
$arrword=$word[mt_rand(0,count($word)-1)];
$keyword= urlencode($arrword);
$url = "http://m.baidu.com/s?word=".$keyword;
//$url = "http://www.sdfymj.com/ua.php";
// 构造包头,模拟浏览器请求
$header = array (
        "Host:www.baidu.com",
        "Content-Type:application/x-www-form-urlencoded",//post请求
        "Connection: keep-alive",
        'Referer:http://m.baidu.com/'
         
);
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $url );
curl_setopt ( $ch, CURLOPT_HTTPHEADER, $header );
curl_setopt($ch, CURLOPT_USERAGENT, $useragent);
curl_setopt($ch, CURLOPT_HTTPHEADER, array('X-FORWARDED-FOR:'.getip(), 'CLIENT-IP:'.get_client_ip()));
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, 1 );
// 执行
$content = curl_exec ( $ch );
if ($content == FALSE) {
    echo "error:" . curl_error ( $ch );
}
// 关闭
curl_close ( $ch );
  
//输出结果
echo $content;
?>

  

posted @   圆柱模板  阅读(264)  评论(0编辑  收藏  举报
编辑推荐:
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具
历史上的今天:
2019-10-09 destoon聚合搜索页面模板
点击右上角即可分享
微信分享提示