全自动小说订阅微信推送

<?php
header("Content-type: text/html;charset=utf-8");
//set_time_limit(0);
 
$dbname = SAE_MYSQL_DB;
 
 $host = SAE_MYSQL_HOST_M;
 $port = SAE_MYSQL_PORT;
 $user = SAE_MYSQL_USER;
 $pwd = SAE_MYSQL_PASS;
  
$connect = @mysql_connect("{$host}:{$port}",$user,$pwd,true);
if(!$connect) {
    die("Connect Server Failed: " . mysql_error());
}
 
if(!mysql_select_db($dbname,$connect)) {
    die("Select Database Failed: " . mysql_error($connect));
}
 
mysql_query("set names 'utf8'");
 
$rules = array(
    'start'     =>   'http://www.douluodalu.com.cn/jueshitangmen/6860.html', //开始采集的url
    'title'     =>   '/<h1>(.*?)<\/h1>/',    //文章title
    'time'      =>   '/发布时间:(.*?)&nbsp;/',   //发布时间
    'content'   =>   '/\"><\/div><p>([\s\S]*?)<div align=center>/', //内容
    'next'      =>   '/下一篇: <a href=\"(.*?)\"/',  //下一篇网址
    );
 
 
//每次排序,取出上一次的最后一篇url
$url = getLatest();
 
//最后一章的下一篇为空,由此循环
while($url != null && $url != ""){
    $value = get($url);
     
    $value = _prefilter($value);//去除空白字符,空格,回车
    $context = getContent($value);
    $context['url'] = $url;//当前url,同时还有下一篇的url
    $url = $context['next'];
    var_dump($url);
    //防止重复
        if(storage($context)){
            storageWP($context);
        };  
}
echo "采集结束";
mysql_close($connect);
 
/*入库*/
function storage($content_array){
    global $connect;
    $sql = "insert into `articles` (`id`, `title`, `time`, `url`, `content`) values(null,
    '{$content_array['title']}',
    '{$content_array['time']}',
    '{$content_array['url']}',
    '{$content_array['content']}');";
    $result = mysql_query($sql,$connect);
    return $result;
}
 
function storageWP($content_array){
        global $connect;    
    $result =  mysql_query("select max(ID) from wp_posts;",$connect);
 
    $row = mysql_fetch_row($result);
    $last_id = $row[0] +1 ;
    $sql = "INSERT INTO `wp_posts` (`ID`, `post_author`, `post_date`, `post_date_gmt`, `post_content`, `post_title`, `post_excerpt`, `post_status`, `comment_status`, `ping_status`, `post_password`, `post_name`, `to_ping`, `pinged`, `post_modified`, `post_modified_gmt`, `post_content_filtered`, `post_parent`, `guid`, `menu_order`, `post_type`, `post_mime_type`, `comment_count`) VALUES (null,1,'{$content_array['time']}', '{$content_array['time']}', '{$content_array['content']}', '{$content_array['title']}', '', 'publish', 'open', 'open', '', '{$content_array['title']}', '', '', '{$content_array['time']}', '{$content_array['time']}', '', 0, 'http://iniu.sinaapp.com/?p={$last_id}', 0, 'post', '', 0);";
 
    $result = mysql_query($sql,$connect);
 
    $sql = "INSERT INTO `wp_term_relationships` (`object_id`, `term_taxonomy_id`, `term_order`) VALUES({$last_id}, 1, 0);";
 
    $result = mysql_query($sql,$connect);
    return $result;
}
/*返回内容数组,title,context,time,nexturl*/
function getContent($value){
    global $rules;  
    preg_match($rules['title'],$value, $title);
 
    preg_match($rules['time'],$value, $time);
 
    preg_match($rules['next'],$value, $next);
 
    preg_match($rules['content'],$value, $content);
 
    $context = array(
    'title' => addslashes($title[1]),
    'time' => $time[1],
    'next' => addslashes($next[1]),
    'content' => addslashes($content[1])
    );
    return $context;
}
 
/*得到最新的一篇文章记录*/
function getLatest(){
  global $connect;
  global $rules;  
  $sql = "SELECT url FROM  `articles` ORDER BY id DESC LIMIT 1";
  $result = mysql_query($sql,$connect);
  $row=mysql_fetch_row($result);
   
  if($row){  
        return $row[0];  
    }else{  
        return $rules['start'];
    } 
  
}
 
/*Http Get*/
function get($url){
    $ch = curl_init($url) ;
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true) ;
    curl_setopt($ch, CURLOPT_BINARYTRANSFER, true) ;
    $value = curl_exec($ch) ;
    curl_close($ch);
    return $value;
}
 
/* 对抓去到的内容做简单过滤(过滤空白字符,便于正则匹配)*/
function _prefilter($output) {
    strip_tags($output);
    $output=preg_replace("/\/\/[\S\f\t\v ]*?;[\r|\n]/", "", $output);
    $output=preg_replace("/\<\!\-\-[\s\S]*?\-\-\>/", "", $output);
    $output=preg_replace("/\>[\s]+\</", "><", $output);
    $output=preg_replace("/;[\s]+/", ";", $output);
    $output=preg_replace("/[\s]+\}/", "}", $output);
    $output=preg_replace("/}[\s]+/", "}", $output);
    $output=preg_replace("/\{[\s]+/", "{", $output);
    $output=preg_replace("/([\s]){2,}/", "$1", $output);
    $output=preg_replace("/[\s]+\=[\s]+/", "=", $output);
    $output=preg_replace("/<br \/>/","",$output);
    $output=preg_replace("/\n/","",$output);
    $output=preg_replace("/  /","",$output);
    return $output;
}
 
 
?>

 

posted on 2016-05-09 15:40  岩_生  阅读(289)  评论(0编辑  收藏  举报

导航