一个爬虫
<?php read(); function read(){ //为了万无一失 header("Content-type:text/html;charset=utf-8"); echo '<meta charset="utf8">'; $myfile = fopen('D:\歌词.txt.txt','r'); echo '1'; $info = []; $num = 0; $number = 0; while($line = fgets($myfile)){ //获取用户名 $net_name_index = strpos($line,'用户名:'); $net_name_end = strpos($line,'email:',$net_name_index); $net_name = trim(substr($line,$net_name_index+strlen('用户名:'),$net_name_end-($net_name_index+strlen('用户名:')))); //获取email $email_index = strpos($line,'email:',$net_name_end); $email_end = strpos($line,'真名:',$email_index); $email = trim(substr($line,$email_index+strlen('email:'),$email_end-($email_index+strlen('email:')))); //获取真名 $name_index = strpos($line,'真名:',$email_end); $name_end = strpos($line,'身份证号:',$name_index); $name = trim(substr($line,$name_index+strlen('真名:'),$name_end-($name_index+strlen('真名:')))); //获取身份证号 $idCard_index = strpos($line,'身份证号:',$name_end); $idCard_end = strpos($line,'绑定手机号',$idCard_index); $idCard = trim(substr($line,$idCard_index+strlen('身份证号:'),$idCard_end-($idCard_index+strlen('身份证号:')))); if(strlen($idCard)!=18){ continue; } $number = $number+1; //获取手机号 $phone_number_index = strpos($line,'绑定手机号',$idCard_end); $phone_number_end = strpos($line,'账户可',$phone_number_index); $phone_number = trim(substr($line,$phone_number_index+strlen('绑定手机号'),$phone_number_end-($phone_number_index+strlen('绑定手机号')))); //获取银行卡号 $bankCard_index = strpos($line,'行卡号:',$phone_number_end); $bankCard_end = strpos($line,'银行:',$bankCard_index); $bankCard = trim(substr($line,$bankCard_index+strlen('行卡号:'),$bankCard_end-($bankCard_index+strlen('行卡号:')))); //这么多重复代码。我甚至可以写个类 //抓取身份证号信息集 $idCrad_url = 'http://qq.ip138.com/idsearch/index.asp?action=idcard&userid='.$idCard; $idCrad_curl = curl($idCrad_url,'gb2312'); $idCard_result = getIDinfo($idCrad_curl); $idnex = $num++; if(strlen($bankCard)>15&&strlen($bankCard)<20){ $bankCard_url = 'http://www.cardcn.com/search.php?word='.$bankCard; $bankCard_curl = curl($bankCard_url); if(substr_count($bankCard_curl,'对不起')==0){ $bankCard_result = getBankinfo($bankCard_curl); $info[$idnex]['bankCard_info'] = $bankCard_result; } } $info[$idnex]['net_name'] = $net_name; $info[$idnex]['email'] = $email; $info[$idnex]['name'] = $name; $info[$idnex]['idCard'] = $idCard; $info[$idnex]['phone_number'] = $phone_number; $info[$idnex]['bankCard'] = $bankCard; $info[$idnex]['idCrad_info'] = $idCard_result; } cl_slqi($info); echo $number; } //$url :html链接 //return :解析后的html文档(字符串) //获取CURL请求的输出信息,这个可以爬取https,非常好 function curl($url,$coding='utf-8') { //初始化 $ch = curl_init(); //设置选项,包括url curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0);//不返回response头部信息 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //TRUE 将curl_exec()获取的信息以字符串返回,而不是直接输出。 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //支持重定向 //不验证证书和host curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); $result = curl_exec($ch); //释放curl句柄 curl_close($ch); //如果网站不是utf-8编码的话要转码 if($coding!='utf-8'){ $result= iconv($coding,"utf-8//IGNORE",$result); } return $result; } //处理并返回身份证信息 function getIDinfo($crul){ $sex_index = strpos($crul,'别:</td><td class="tdc2">'); $date_index = strpos($crul,'生日期:</td><td class="tdc2">',$sex_index); $idcard_place_index = strpos($crul,';地:</td><td class="tdc2">',$date_index); $idcard_place_end = strpos($crul,'<br/></td></t',$idcard_place_index); $id_info = []; $id_info['date'] = trim(substr($crul, $date_index+strlen('生日期:</td><td class="tdc2">'),4)); $id_info['sex'] = trim(substr($crul,$sex_index+strlen('别:</td><td class="tdc2">'),3)); $id_info['idCard_space'] = trim(substr($crul,$idcard_place_index+strlen(';地:</td><td class="tdc2">'),$idcard_place_end-($idcard_place_index+strlen(';地:</td><td class="tdc2">')))); return $id_info; } //处理并返回银行卡信息 function getBankinfo($bank_crul){ $bank_info = []; //银行卡归属地 $back_space_index = strpos($bank_crul,'e">归属信息:</font>'); $back_space_end = strpos($bank_crul,'</dt>',$back_space_index); $bank_info['back_space'] = trim(substr($bank_crul,$back_space_index+strlen('e">归属信息:</font>'),$back_space_end-($back_space_index+strlen('e">归属信息:</font>')))); //银行名称 $bank_name_index = strpos($bank_crul,'e">银行名称:</font>',$back_space_end); $bank_name_end = strpos($bank_crul,'</dt>',$bank_name_index); $bank_info['bank_name'] = trim(substr($bank_crul,$bank_name_index+strlen('e">银行名称:</font>'),$bank_name_end-($bank_name_index+strlen('e">银行名称:</font>')))); //银行卡名称 $bankCard_name_index = strpos($bank_crul,'e">银行卡名:</font>',$bank_name_end); $bankCard_name_end = strpos($bank_crul,'</dt>',$bankCard_name_index); $bank_info['bankCard_name'] = trim(substr($bank_crul,$bankCard_name_index+strlen('e">银行卡名:</font>'),$bankCard_name_end-($bankCard_name_index+strlen('e">银行卡名:</font>')))); //银行卡种类 $bank_info['bank_kind'] = getKeyWord($bank_crul,'<dt><font class="con_sub_title">银行卡种:</font>','</dt>'); return $bank_info; } //截取有用的子串(爬虫相关) //$info=网页 $first_key=开始的字符串 $last_key=结束的字符串 //return 中间的字符串; function getKeyWord($info,$first_key,$last_key){ $len = strlen($first_key); $first_key_start = strpos($info,$first_key); $last_key_start = strpos($info,$last_key,$first_key_start); $keyword = trim(substr($info,$first_key_start+$len,$last_key_start-$first_key_start-$len)); return $keyword; } //把数据写入到数据库 function cl_slqi($arr){ $con = mysqli_connect('localhost','root','root','aiqiyi'); if(!$con){ die('could not connect'); } $temp = 0; foreach($arr as $value=>$key){ if(!isset($key['bankCard_info'])){ $sql = "insert into info(name,idCard,idCard_space,sex,date,net_name,email,phone_number) values('{$key['name']}','{$key['idCard']}','{$key['idCrad_info']['idCard_space']}','{$key['idCrad_info']['sex']}','{$key['idCrad_info']['date']}','{$key['net_name']}','{$key['email']}','{$key['phone_number']}')"; }else{ $sql = "insert into info(name,idCard,idCard_space,sex,date,net_name,email,phone_number,bankCard,back_name,bankCard_name,back_kind,back_space) values('{$key['name']}','{$key['idCard']}','{$key['idCrad_info']['idCard_space']}','{$key['idCrad_info']['sex']}','{$key['idCrad_info']['date']}','{$key['net_name']}','{$key['email']}','{$key['phone_number']}','{$key['bankCard']}','{$key['bankCard_info']['bank_name']}','{$key['bankCard_info']['bankCard_name']}','{$key['bankCard_info']['bank_kind']}','{$key['bankCard_info']['back_space']}')"; } if(mysqli_query($con,$sql)){ echo 'insert成功!这是第'.$temp.'个成功!'; $temp++; echo "\n"; }else{ echo 'insert失败!';echo "\n"; } } } ?>