爬虫之遇到521,破解cookie(直接过程,后续有时间细细分解)
使用python、scrapy
import execjs# 安装pip install PyExecJS 用这种方式只是为了调试,实际使用中还是要用js引擎v8
看了不少博客是要用到模拟浏览器phantomjs、chrome什么的,在实际应用,效率真的慢的可怕,所以这样的方法抛弃!(其实还有好多网站的js会发现这种操作)
废话不多说,开始:
首先从楼主要研究的网站说起(具体网站就不公布了,fiddler抓包之类的也不多说),首先最简单的要带上User-Agent,得到的是521状态码!(521其实就是访问没有cookie,而且还需要2个值才能正常访问)看下response结果一串js,另外还会返回一个cookie值(Set-Cookie里面的值):
<script>var x="@@@@charAt@setTimeout@0xFF@cookie@Fri@GMT@String@@RK@parseInt@D@@8@@toString@@Expires@try@26@@2@firstChild@length@9L@0xEDB88320@toLowerCase@@match@challenge@pathname@@function@@reverse@@@@18@@1540540706@@else@0@@split@@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@@f@@g@JgSe0upZ@location@substr@RegExp@https@@DOMContentLoaded@replace@@eval@08@Array@@925@@Path@onreadystatechange@window@if@5@document@e@36@hantom@1@createElement@@6@@charCodeAt@@div@captcha@tsL@@@1500@addEventListener@a@for@attachEvent@Oct@@href@3@catch@return@@chars@@@var@@@Q@@@callP@d@innerHTML@yG@false@search@@@@fromCharCode@58@new@join@@while@__jsl_clearance".replace(/@*$/,"").split("@"),y="6b 1f=24(){6('39.63=39.22+39.76.3f(/[\\?|&]58-21/,\\'\\')',5c);4c.8='80=2c.45|2f|'+(24(){6b 5b=[24(1f){66 1f},24(5b){66 5b},(24(){6b 1f=4c.51('57');1f.73='<5e 63=\\'/\\'>30</5e>';1f=1f.1a.63;6b 5b=1f.20(/3c?:\\/\\//)[2f];1f=1f.3a(5b.1b).1e();66 24(5b){5f(6b 30=2f;30<5b.1b;30++){5b[30]=1f.5(5b[30])};66 5b.7d('')}})(),24(1f){66 41('b.7a('+1f+')')}],30=[[(-~{}+[~~{}])/[19]]+(![]+[[]][2f]).5(((+!-[])+[-~!/!/-~!/!/]>>-~!/!/-~!/!/)),'d',[19+19],'74',[[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]]+[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]],(-~(+[])+[]+[])+[-~!/!/-~!/!/]+[~~'']],'f',[[(-~{}+[~~{}])/[19]]],[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[53]],'6e',[[53]+(-~[]+((+!-[])<<-~!/!/+((+!-[])<<(+!-[])))+[]+[[]][2f])],[19+19],'59',[49['71'+'4f']+[]+[[]][2f]][2f].5((((+!-[])<<(+!-[]))<<(+!-[])))+[(-~{}|19)],'4d',[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[-~!/!/-~!/!/]],'1c',[!!49['71'+'4f']+[]][2f].5(-~!/!/-~!/!/),[(-~(+[])+[]+[])+[-~!/!/-~!/!/]],[-~!/!/+((+!-[])<<(+!-[]))+4b],'%64',[[53]+[-~!/!/+((+!-[])<<(+!-[]))+4b]]];5f(6b 1f=2f;1f<30.1b;1f++){30[1f]=5b[[2f,50,2f,50,64,50,19,64,50,64,2f,50,2f,50,64,50,2f,19,2f,50,64][1f]](30[1f])};66 30.7d('')})()+';15=9, 17-61-2a 42:7b:17 a;47=/;'};4a((24(){16{66 !!49.5d;}65(4d){66 75;}})()){4c.5d('3e',1f,75)}2e{4c.60('48',1f)}",f=function(x,y){var a=0,b=0,c=0;x=x.split("");y=y||99;while((a=x.shift())&&(b=a.charCodeAt(0)-77.5))c=(Math.abs(b)<13?(b+48.5):parseInt(a,36))+y*c;return c},z=f(y.match(/\w/g).sort(function(x,y){return f(x)-f(y)}).pop());while(z++)try{eval(y.replace(/\b\w+\b/g, function(y){return x[f(y,z)-1]||("_"+y)}));break}catch(_){}</script>
网上美化格式的工具比较多转换一下!
< script >
var x = "@@@@charAt@setTimeout@0xFF@cookie@Fri@GMT@String@@RK@parseInt@D@@8@@toString@@Expires@try@26@@2@firstChild@length@9L@0xEDB88320@toLowerCase@@match@challenge@pathname@@function@@reverse@@@@18@@1540540706@@else@0@@split@@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@@f@@g@JgSe0upZ@location@substr@RegExp@https@@DOMContentLoaded@replace@@eval@08@Array@@925@@Path@onreadystatechange@window@if@5@document@e@36@hantom@1@createElement@@6@@charCodeAt@@div@captcha@tsL@@@1500@addEventListener@a@for@attachEvent@Oct@@href@3@catch@return@@chars@@@var@@@Q@@@callP@d@innerHTML@yG@false@search@@@@fromCharCode@58@new@join@@while@__jsl_clearance".replace(/@*$/, "").split("@"),
y = "6b 1f=24(){6('39.63=39.22+39.76.3f(/[\\?|&]58-21/,\\'\\')',5c);4c.8='80=2c.45|2f|'+(24(){6b 5b=[24(1f){66 1f},24(5b){66 5b},(24(){6b 1f=4c.51('57');1f.73='<5e 63=\\'/\\'>30</5e>';1f=1f.1a.63;6b 5b=1f.20(/3c?:\\/\\//)[2f];1f=1f.3a(5b.1b).1e();66 24(5b){5f(6b 30=2f;30<5b.1b;30++){5b[30]=1f.5(5b[30])};66 5b.7d('')}})(),24(1f){66 41('b.7a('+1f+')')}],30=[[(-~{}+[~~{}])/[19]]+(![]+[[]][2f]).5(((+!-[])+[-~!/!/-~!/!/]>>-~!/!/-~!/!/)),'d',[19+19],'74',[[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]]+[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]],(-~(+[])+[]+[])+[-~!/!/-~!/!/]+[~~'']],'f',[[(-~{}+[~~{}])/[19]]],[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[53]],'6e',[[53]+(-~[]+((+!-[])<<-~!/!/+((+!-[])<<(+!-[])))+[]+[[]][2f])],[19+19],'59',[49['71'+'4f']+[]+[[]][2f]][2f].5((((+!-[])<<(+!-[]))<<(+!-[])))+[(-~{}|19)],'4d',[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[-~!/!/-~!/!/]],'1c',[!!49['71'+'4f']+[]][2f].5(-~!/!/-~!/!/),[(-~(+[])+[]+[])+[-~!/!/-~!/!/]],[-~!/!/+((+!-[])<<(+!-[]))+4b],'%64',[[53]+[-~!/!/+((+!-[])<<(+!-[]))+4b]]];5f(6b 1f=2f;1f<30.1b;1f++){30[1f]=5b[[2f,50,2f,50,64,50,19,64,50,64,2f,50,2f,50,64,50,2f,19,2f,50,64][1f]](30[1f])};66 30.7d('')})()+';15=9, 17-61-2a 42:7b:17 a;47=/;'};4a((24(){16{66 !!49.5d;}65(4d){66 75;}})()){4c.5d('3e',1f,75)}2e{4c.60('48',1f)}",
f = function(x, y) {
var a = 0,
b = 0,
c = 0;
x = x.split("");
y = y || 99;
while ((a = x.shift()) && (b = a.charCodeAt(0) - 77.5)) c = (Math.abs(b) < 13 ? (b + 48.5) : parseInt(a, 36)) + y * c;
return c
},
z = f(y.match(/\w/g).sort(function(x, y) {
return f(x) - f(y)
}).pop());
while (z++) try {
eval(y.replace(/\b\w+\b/g, function(y) {
return x[f(y, z) - 1] || ("_" + y)
}));
break
} catch (_) {} < /script>
发现问题出在这里
# cookie第一个值 __jsluid = response.headers["Set-Cookie"].split(';')[0] cookie1 = __jsluid # 解密 get_js = re.findall(r'<script>(.*?)</script>', resp_body)[0].replace('eval', 'return') resHtml = "function getClearance(){" + get_js + "};" ctx = execjs.compile(resHtml) # 一级解密结果 temp1 = ctx.call('getClearance')
结果返回的又是一段js:
var _1f=function(){setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')',1500);document.cookie='__jsl_clearance=1540540706.925|0|'+(function(){var _5b=[function(_1f){return _1f},function(_5b){return _5b},(function(){var _1f=document.createElement('div');_1f.innerHTML='<a href=\'/\'>_30</a>';_1f=_1f.firstChild.href;var _5b=_1f.match(/https?:\/\//)[0];_1f=_1f.substr(_5b.length).toLowerCase();return function(_5b){for(var _30=0;_30<_5b.length;_30++){_5b[_30]=_1f.charAt(_5b[_30])};return _5b.join('')}})(),function(_1f){return eval('String.fromCharCode('+_1f+')')}],_30=[[(-~{}+[~~{}])/[2]]+(![]+[[]][0]).charAt(((+!-[])+[-~!/!/-~!/!/]>>-~!/!/-~!/!/)),'RK',[2+2],'yG',[[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]]+[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]],(-~(+[])+[]+[])+[-~!/!/-~!/!/]+[~~'']],'D',[[(-~{}+[~~{}])/[2]]],[[-~!/!/+((+!-[])<<(+!-[]))+5]+[6]],'Q',[[6]+(-~[]+((+!-[])<<-~!/!/+((+!-[])<<(+!-[])))+[]+[[]][0])],[2+2],'tsL',[window['callP'+'hantom']+[]+[[]][0]][0].charAt((((+!-[])<<(+!-[]))<<(+!-[])))+[(-~{}|2)],'e',[[-~!/!/+((+!-[])<<(+!-[]))+5]+[-~!/!/-~!/!/]],'9L',[!!window['callP'+'hantom']+[]][0].charAt(-~!/!/-~!/!/),[(-~(+[])+[]+[])+[-~!/!/-~!/!/]],[-~!/!/+((+!-[])<<(+!-[]))+5],'%3',[[6]+[-~!/!/+((+!-[])<<(+!-[]))+5]]];for(var _1f=0;_1f<_30.length;_1f++){_30[_1f]=_5b[[0,1,0,1,3,1,2,3,1,3,0,1,0,1,3,1,0,2,0,1,3][_1f]](_30[_1f])};return _30.join('')})()+';Expires=Fri, 26-Oct-18 08:58:26 GMT;Path=/;'};if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('DOMContentLoaded',_1f,false)}else{document.attachEvent('onreadystatechange',_1f)}
美化:
var _1f = function() {
setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')', 1500);
document.cookie = '__jsl_clearance=1540540706.925|0|' + (function() {
var _5b = [function(_1f) {
return _1f
}, function(_5b) {
return _5b
}, (function() {
var _1f = document.createElement('div');
_1f.innerHTML = '<a href=\'/\'>_30</a>';
_1f = _1f.firstChild.href;
var _5b = _1f.match(/https?:\/\//)[0];
_1f = _1f.substr(_5b.length).toLowerCase();
return function(_5b) {
for (var _30 = 0; _30 < _5b.length; _30++) {
_5b[_30] = _1f.charAt(_5b[_30])
};
return _5b.join('')
}
})(), function(_1f) {
return eval('String.fromCharCode(' + _1f + ')')
}],
_30 = [
[(-~ {} + [~~ {}]) / [2]] + (![] + [
[]
][0]).charAt(((+!-[]) + [-~!/!/ - ~!/!/] >> -~!/!/ - ~!/!/)), 'RK', [2 + 2], 'yG', [
[-~ [-~!/!/ + ((+!-[]) << (+!-[])) - ~!/!/ + ((+!-[]) << (+!-[]))]] + [-~ [-~!/!/ + ((+!-[]) << (+!-[])) - ~!/!/ + ((+!-[]) << (+!-[]))]], (-~ (+[]) + [] + []) + [-~!/!/ - ~!/!/] + [~~'']
], 'D', [
[(-~ {} + [~~ {}]) / [2]]
],
[
[-~!/!/ + ((+!-[]) << (+!-[])) + 5] + [6]
], 'Q', [
[6] + (-~ [] + ((+!-[]) << -~!/!/ + ((+!-[]) << (+!-[]))) + [] + [
[]
][0])],
[2 + 2], 'tsL', [window['callP' + 'hantom'] + [] + [
[]
][0]][0].charAt((((+!-[]) << (+!-[])) << (+!-[]))) + [(-~ {} | 2)], 'e', [
[-~!/!/ + ((+!-[]) << (+!-[])) + 5] + [-~!/!/ - ~!/!/]
], '9L', [ !! window['callP' + 'hantom'] + []][0].charAt(-~!/!/ - ~!/!/), [(-~ (+[]) + [] + []) + [-~!/!/ - ~!/!/]],
[-~!/!/ + ((+!-[]) << (+!-[])) + 5], '%3', [
[6] + [-~!/!/ + ((+!-[]) << (+!-[])) + 5]
]
];
for (var _1f = 0; _1f < _30.length; _1f++) {
_30[_1f] = _5b[[0, 1, 0, 1, 3, 1, 2, 3, 1, 3, 0, 1, 0, 1, 3, 1, 0, 2, 0, 1, 3][_1f]](_30[_1f])
};
return _30.join('')
})() + ';Expires=Fri, 26-Oct-18 08:58:26 GMT;Path=/;'
};
if ((function() {
try {
return !!window.addEventListener;
} catch (e) {
return false;
}
})()) {
document.addEventListener('DOMContentLoaded', _1f, false)
} else {
document.attachEvent('onreadystatechange', _1f)
}
具体破解省略。。。
结果
__jsl_clearance=1540540706.925|0|5sRK4yGMxDiVQE4tsLf3eR9Lln8%3D
过程就不一一细说了,反正当时花了不少时间
对了,里面有一步必须要加上url!!!
注:工作中的遇到的一些问题,可能我的方法不一定是最好的,大家一起相互交流+扣扣571848990