php 爬虫爱奇艺 视频、内容

<?php
function getdata($i, $url) {

  $data = array();
// 把整个文件读入到字符串中
  $str = file_get_contents($url);
  $str = substr($str, strpos($str, 'album-head-info clearfix'));
// print_r($str);
  $str = substr($str,0,strpos($str, 'class="album-auto"'));


  $preg='/<a .*?id="j-album-title".*?>(.*?)<\/a>/is'
  preg_match_all($preg,$str,$match);
  //echo $str;exit;
  $title = $match[1][0];
  $data['title'] = $title;

  $preg='/<span .*?class="info-intro-title-s".*?>(.*?)<\/span>/is'
  preg_match_all($preg,$str,$match);
  $other_title = $match[1][0];
  $data['other_title'] = $other_title;

  //地区
  $preg='/<p .*?class="episodeIntro-area".*?>.*?<em>(.*?)<\/em>.*?<a.*?>(.*?)<\/a>.*?<\/p>/is'
  preg_match_all($preg,$str,$match);
  $data['area'] = trim($match[2][0]);


  //语言
  $preg='/<p .*?class="episodeIntro-lang".*?>.*?<em>(.*?)<\/em>.*?<span.*?>(.*?)<\/span>.*?<\/p>/is'
  preg_match_all($preg,$str,$match);
  $data['lang'] = trim($match[2][0]);


  //类型-悬疑/历史/剧情
  $preg='/<a .*?qwys_leixing.*?>(.*?)<\/a>/is'
  preg_match_all($preg,$str,$match);

  $data['type'] = implode('/', $match[1]);
  
// 时间
  $preg='/<p .*?class="episodeIntro-time".*?>.*?<em>(.*?)<\/em>.*?<span.*?>(.*?)<\/span>.*?<\/p>/is'
  preg_match_all($preg,$str,$match);
  $data['time'] = $match[2][0];
// 导演
  $preg='/<p .*?class="episodeIntro-director".*?>.*?<em>(.*?)<\/em>.*?<a.*?>(.*?)<\/a>.*?<\/p>/is'
  preg_match_all($preg,$str,$match);
  $data['daoyan'] = $match[2][0];
// 简介
  $preg='/<span .*?class="briefIntroTxt".*?>(.*?)<\/span>/is';
  preg_match_all($preg,$str,$match);
  //$data['summary'] = $match[1][0];
  
  if(!empty( $match[1][1])) {
    $data['summary_all'] = $match[1][1];
  }elseif (!empty( $match[1][0])) {
    $data['summary_all'] = $match[1][0];
  }

  $preg='/<img .*?src="(.*?)".*?id="j-album-img".*?>/is'
  preg_match_all($preg,$str,$match);
  $img = $match[1][0];

  $file ='/data/' . $i. '.jpg';
  if(!file_exists($file)) {
    $f = file_get_contents($img);
    if($f) {
      file_put_contents($file, $f);
    }
  }
  
  return $data;
}
// explode 将字符串打散
$data = file('dianshiju02.txt');

$ret = array();

$i = 5000;
foreach($data as $v) {

  $i++;
  // if(strpos($v, 'mp4') !== false) {
  //  continue;
  // }
  
  $tmp = explode("\t", $v);

  // print_r($tmp);
  // exit;
  $num = (int) $tmp[0];

  $_names = explode("/", $tmp[1]);
  $_names = explode("-", $_names[0]);

  $mp4 = $i . "/01.mp4";

  $infos = array();
  if(!empty($tmp[2])) {
    print_r($i);
    print_r($tmp[2]);
    // exit;
    $infos = getdata($i, $tmp[2]);
  } else {
    continue;
  }

  $ret[$i] = array(
    'title' => $infos['title'],
    'num' => $num,
    'img' => '//static0.qianqian.com/movies/' . $i . '.jpg',
    'mp4' => 'http://qukufile2.qianqian.com/data2/film_tv/tv/' . $mp4,
    'id' => $i,
    'infos' => $infos
);
}
// echo count($ret);
echo var_export($ret, true);

 

40 雪山飞狐-01.mpg http://www.iqiyi.com/lib/m_204754714.html?src=search 33 大捕房-01.mp4 http://www.iqiyi.com/lib/m_202787314.html?src=search 40 嫁入豪门=01.mp4 http://www.iqiyi.com/lib/m_200881014.html?src=search 32 劝和小姐-01.mp4 http://www.iqiyi.com/lib/m_200840914.html?src=search 30 血色恋情-01.mp4 http://www.iqiyi.com/lib/m_202498214.html?src=search 32 锁定美军特使-01.mp4 http://www.iqiyi.com/lib/m_218730014.html?src=search 32 红狐-01.mp4 http://www.iqiyi.com/lib/m_202587214.html?src=search 34 大浴堂-01.mp4 http://www.iqiyi.com/lib/m_200880514.html?src=search 30 女婿难当-01.mp4 http://www.iqiyi.com/lib/m_206378814.html?src=search 27 风云1911-01.mp4 http://www.iqiyi.com/lib/m_202904114.html?src=search 28 醉红尘-01.mp4 http://www.iqiyi.com/lib/m_202964014.html?src=search 23 栗裕大将-01.mp4 http://www.iqiyi.com/lib/m_215180614.html?src=search 24 将军日记-01.mp4 http://www.iqiyi.com/lib/m_202547514.html?src=search 

 

posted @ 2019-07-17 11:58  focus_yaya  阅读(798)  评论(0编辑  收藏  举报