采集目标页面内容

直接贴代码

$week=intval(date(w)); 
if($week==1){ 
    $url = "http://www.lesroches.edu/les_roches_bluche/en/en-en/index.cfm";
    $r = file_get_contents($url); 
    $preg = '/<div id="event">(.*)<div id="m_right">/isU'; 
    preg_match($preg, $r, $title); 
    $content = $title[0];
    $content = preg_replace("/[\s]{2,}/","",$content);
    $content = str_replace('</div></div></div><div id="m_right">', '', $content);
    
    $content = str_replace('src="/common', 'src="http://www.lesroches.edu/common', $content);
    $content = str_replace('href="/', 'href="http://www.lesroches.edu/', $content);
    $content = str_replace('href="index', 'href="http://www.lesroches.edu/les_roches_bluche/en/en-en/index', $content);
    $content = str_replace('<ahref=', '<a href=', $content);
    //$content = str_replace('http://www.lesroches.edu/common/img/template/les_roches_bluche/event_time.gif', 'templates/lesroches/index_en/event_time.gif', $content);
    $content = str_replace('http://www.lesroches.edu/common/img/template/les_roches_bluche/EN-EN/', 'templates/lesroches/index_en/', $content);
    
    $content = str_replace("'", "\'", $content);
    $content = "var caiji_c = '".$content."';";
    //echo $content;
    //写入js文件
    $file = fopen("http://images.cnblogs.com/stories/caiji_content.js","w");
    fwrite($file,$content);
    fclose($file);
}
posted @ 2012-05-10 11:58  xiaoluozi513  阅读(155)  评论(0编辑  收藏  举报