casperjs 抓取爱奇艺高清视频
CasperJS 是一个开源的导航脚本和测试工具,使用 JavaScript 基于 PhantomJS 编写,用于测试 Web 应用功能,Phantom JS是一个服务器端的 JavaScript API 的 WebKit。其支持各种Web标准: DOM 处理, CSS 选择器, JSON, Canvas, 和 SVG。
抓取流程:
实现代码:
var fs = require('fs'); var casper = require('casper').create({ verbose: true, timeout: 100000000,//60秒超时,退出 logLevel: "debug", pageSettings: { webSecurityEnabled:true, loadImages: true, loadPlugins: true, //userAgent: 'Mozilla/5.0 (Windqows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36 LBBROWSER' //userAgent:'Mozilla/5.0 (Linux;U;) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.0.0 Safari/537.36 ' //userAgent:'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36' }, clientScripts: [ 'jquery-1.10.1.min.js' ] }); var flag="false" //var url = casper.cli.raw.get('url'); var url ='http://www.iqiyi.com/v_19rrlq5w00.html?fc=8b62d5327a54411b#vfrm=19-9-0-1'; //排除不相关的请求,加快页面加载进度 casper.on('resource.received',function(receivedData, received){ var rurl=receivedData.url; if (rurl.indexOf('.f4v?') > 0) { var link = rurl; fs.write("f4v/received.txt", "====>" + link + "\r\n", 'a'); casper.capture('temp'+CurentTime()+'.png'); //SendLinkUrl(link); //casper.click('#flash',981,353); casper.echo('111111111111111111111111'); var flashContent=$("#flash").innerHTML; casper.echo(flashContent); casper.echo('++++++++++++++++++++++++') } casper.wait(5000,function(){ if(flag=='false'){ casper.click('.usrTx-login a'); casper.then(function(){ this.echo("===flag:"+flag); casper.capture('temp1.png'); this.sendKeys('.acountBorder input[type=text]','username'); this.sendKeys('.acountBorder input[type=password]','pwd'); casper.capture('temp2.png'); this.click(".login_submitV3 a"); casper.capture('temp3.png'); flag='true'; //this.echo($("#flash")); }); } }); }); casper.on('timeout', function () { this.echo("===>timeout"+url); var fileName = this.evaluate(getFileName); var nowTime = this.evaluate(CurentTime); fs.write("log/timeout_" + fileName + ".txt", nowTime + "====>" + url + "\r\n", 'a'); }); //请求页面 casper.start(url, function () { var status = this.status().currentHTTPStatus; //fs.write("temp.html", this.getHTML(), 'w'); }); function getFileName() { var now = new Date(); var year = now.getFullYear(); //年 var month = now.getMonth() + 1; //月 var day = now.getDate(); //日 return (year + "" + month + "" + day); } function SendLinkUrl(link){ $.ajax({ url: "http://10.1.17.218:8889/admin.do", type: "post", dataType: "json", async: false, data: { "link": albums }, success: function (json) { } }); } function CurentTime() { var now = new Date(); var year = now.getFullYear(); //年 var month = now.getMonth() + 1; //月 var day = now.getDate(); //日 var hh = now.getHours(); //时 var mm = now.getMinutes(); //分 var clock = year + ""; if (month < 10) clock += "0"; clock += month; if (day < 10) clock += "0"; clock += day + ""; if (hh < 10) clock += "0"; clock += hh + ""; if (mm < 10) clock += '0'; clock += mm; return (clock); } casper.run();
可以把爱奇艺的f4v地址保存到txt文件,通过nodejs或python脚步下载,地址有效时间是6分钟,超时重新跑取。
本人有抓取1080P介质方案,如有需要联系扣扣:1135425244。
posted on 2016-08-04 11:14 tao1135425244 阅读(736) 评论(0) 编辑 收藏 举报