casperjs 抓取爱奇艺高清视频

CasperJS 是一个开源的导航脚本和测试工具,使用 JavaScript 基于 PhantomJS 编写,用于测试 Web 应用功能,Phantom JS是一个服务器端的 JavaScript API 的 WebKit。其支持各种Web标准: DOM 处理, CSS 选择器, JSON, Canvas, 和 SVG。

抓取流程:

实现代码:

var fs = require('fs');
var casper = require('casper').create({
    verbose: true,
    timeout: 100000000,//60秒超时,退出
    logLevel: "debug",
    pageSettings: {
         webSecurityEnabled:true,
         loadImages: true,
         loadPlugins: true,
         //userAgent: 'Mozilla/5.0 (Windqows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER'
         //userAgent:'Mozilla/5.0 (Linux;U;) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.0.0 Safari/537.36 '
         //userAgent:'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'
    },
    clientScripts:  [
        'jquery-1.10.1.min.js'  
    ]
});

 var flag="false"
 //var url = casper.cli.raw.get('url'); 
 var url ='http://www.iqiyi.com/v_19rrlq5w00.html?fc=8b62d5327a54411b#vfrm=19-9-0-1';
 //排除不相关的请求,加快页面加载进度
casper.on('resource.received',function(receivedData, received){
      var rurl=receivedData.url;
      if (rurl.indexOf('.f4v?') > 0) {
          var link = rurl;
          fs.write("f4v/received.txt",  "====>" + link + "\r\n", 'a');
          casper.capture('temp'+CurentTime()+'.png');
          //SendLinkUrl(link);
          //casper.click('#flash',981,353);
          
          casper.echo('111111111111111111111111');
          var flashContent=$("#flash").innerHTML;
          casper.echo(flashContent);
          casper.echo('++++++++++++++++++++++++')
      }
       
      casper.wait(5000,function(){

            if(flag=='false'){
                casper.click('.usrTx-login a');
                casper.then(function(){
                    this.echo("===flag:"+flag);
                    casper.capture('temp1.png');
               
                    this.sendKeys('.acountBorder input[type=text]','username');
                    this.sendKeys('.acountBorder input[type=password]','pwd');
                    casper.capture('temp2.png');
                
                    this.click(".login_submitV3 a");
                    casper.capture('temp3.png');
                    flag='true';
                    //this.echo($("#flash"));
                });
            }
      }); 
 });

 casper.on('timeout', function () {
      this.echo("===>timeout"+url);
      var fileName = this.evaluate(getFileName);
      var nowTime = this.evaluate(CurentTime);
      fs.write("log/timeout_" + fileName + ".txt", nowTime + "====>" + url + "\r\n", 'a');
 });

 //请求页面
 casper.start(url, function () {
       var status = this.status().currentHTTPStatus;
           //fs.write("temp.html", this.getHTML(), 'w');
 });

 function getFileName() {
     var now = new Date();
 
     var year = now.getFullYear();       //
     var month = now.getMonth() + 1;     //
     var day = now.getDate();            //
  
     return (year + "" + month + "" + day);
 }
  
 function SendLinkUrl(link){

             $.ajax({
             url: "http://10.1.17.218:8889/admin.do",
             type: "post",
             dataType: "json",
             async: false,
             data: {
                 "link": albums
             },
             success: function (json) {
              
             }
          });
 
 }
 function CurentTime() {

     var now = new Date();
 
     var year = now.getFullYear();       //
     var month = now.getMonth() + 1;     //
     var day = now.getDate();            //
  
     var hh = now.getHours();            //
     var mm = now.getMinutes();          //
  
     var clock = year + "";
  
     if (month < 10)
         clock += "0";
 
     clock += month;
 
     if (day < 10)
         clock += "0";
 
     clock += day + "";
  
     if (hh < 10)
         clock += "0";

     clock += hh + "";
     if (mm < 10) clock += '0';
     clock += mm;
     return (clock);
 }

casper.run();


可以把爱奇艺的f4v地址保存到txt文件,通过nodejs或python脚步下载,地址有效时间是6分钟,超时重新跑取。

本人有抓取1080P介质方案,如有需要联系扣扣:1135425244。

 

posted on 2016-08-04 11:14  tao1135425244  阅读(736)  评论(0编辑  收藏  举报

导航