爬虫之遇到521,破解cookie(直接过程,后续有时间细细分解)

使用python、scrapy

import execjs# 安装pip install PyExecJS  用这种方式只是为了调试,实际使用中还是要用js引擎v8

看了不少博客是要用到模拟浏览器phantomjs、chrome什么的,在实际应用,效率真的慢的可怕,所以这样的方法抛弃!(其实还有好多网站的js会发现这种操作)

废话不多说,开始:

首先从楼主要研究的网站说起(具体网站就不公布了,fiddler抓包之类的也不多说),首先最简单的要带上User-Agent,得到的是521状态码!(521其实就是访问没有cookie,而且还需要2个值才能正常访问)看下response结果一串js,另外还会返回一个cookie值(Set-Cookie里面的值):

<script>var x="@@@@charAt@setTimeout@0xFF@cookie@Fri@GMT@String@@RK@parseInt@D@@8@@toString@@Expires@try@26@@2@firstChild@length@9L@0xEDB88320@toLowerCase@@match@challenge@pathname@@function@@reverse@@@@18@@1540540706@@else@0@@split@@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@@f@@g@JgSe0upZ@location@substr@RegExp@https@@DOMContentLoaded@replace@@eval@08@Array@@925@@Path@onreadystatechange@window@if@5@document@e@36@hantom@1@createElement@@6@@charCodeAt@@div@captcha@tsL@@@1500@addEventListener@a@for@attachEvent@Oct@@href@3@catch@return@@chars@@@var@@@Q@@@callP@d@innerHTML@yG@false@search@@@@fromCharCode@58@new@join@@while@__jsl_clearance".replace(/@*$/,"").split("@"),y="6b 1f=24(){6('39.63=39.22+39.76.3f(/[\\?|&]58-21/,\\'\\')',5c);4c.8='80=2c.45|2f|'+(24(){6b 5b=[24(1f){66 1f},24(5b){66 5b},(24(){6b 1f=4c.51('57');1f.73='<5e 63=\\'/\\'>30</5e>';1f=1f.1a.63;6b 5b=1f.20(/3c?:\\/\\//)[2f];1f=1f.3a(5b.1b).1e();66 24(5b){5f(6b 30=2f;30<5b.1b;30++){5b[30]=1f.5(5b[30])};66 5b.7d('')}})(),24(1f){66 41('b.7a('+1f+')')}],30=[[(-~{}+[~~{}])/[19]]+(![]+[[]][2f]).5(((+!-[])+[-~!/!/-~!/!/]>>-~!/!/-~!/!/)),'d',[19+19],'74',[[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]]+[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]],(-~(+[])+[]+[])+[-~!/!/-~!/!/]+[~~'']],'f',[[(-~{}+[~~{}])/[19]]],[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[53]],'6e',[[53]+(-~[]+((+!-[])<<-~!/!/+((+!-[])<<(+!-[])))+[]+[[]][2f])],[19+19],'59',[49['71'+'4f']+[]+[[]][2f]][2f].5((((+!-[])<<(+!-[]))<<(+!-[])))+[(-~{}|19)],'4d',[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[-~!/!/-~!/!/]],'1c',[!!49['71'+'4f']+[]][2f].5(-~!/!/-~!/!/),[(-~(+[])+[]+[])+[-~!/!/-~!/!/]],[-~!/!/+((+!-[])<<(+!-[]))+4b],'%64',[[53]+[-~!/!/+((+!-[])<<(+!-[]))+4b]]];5f(6b 1f=2f;1f<30.1b;1f++){30[1f]=5b[[2f,50,2f,50,64,50,19,64,50,64,2f,50,2f,50,64,50,2f,19,2f,50,64][1f]](30[1f])};66 30.7d('')})()+';15=9, 17-61-2a 42:7b:17 a;47=/;'};4a((24(){16{66 !!49.5d;}65(4d){66 75;}})()){4c.5d('3e',1f,75)}2e{4c.60('48',1f)}",f=function(x,y){var a=0,b=0,c=0;x=x.split("");y=y||99;while((a=x.shift())&&(b=a.charCodeAt(0)-77.5))c=(Math.abs(b)<13?(b+48.5):parseInt(a,36))+y*c;return c},z=f(y.match(/\w/g).sort(function(x,y){return f(x)-f(y)}).pop());while(z++)try{eval(y.replace(/\b\w+\b/g, function(y){return x[f(y,z)-1]||("_"+y)}));break}catch(_){}</script>

网上美化格式的工具比较多转换一下!

< script >
var x = "@@@@charAt@setTimeout@0xFF@cookie@Fri@GMT@String@@RK@parseInt@D@@8@@toString@@Expires@try@26@@2@firstChild@length@9L@0xEDB88320@toLowerCase@@match@challenge@pathname@@function@@reverse@@@@18@@1540540706@@else@0@@split@@rOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB6@@f@@g@JgSe0upZ@location@substr@RegExp@https@@DOMContentLoaded@replace@@eval@08@Array@@925@@Path@onreadystatechange@window@if@5@document@e@36@hantom@1@createElement@@6@@charCodeAt@@div@captcha@tsL@@@1500@addEventListener@a@for@attachEvent@Oct@@href@3@catch@return@@chars@@@var@@@Q@@@callP@d@innerHTML@yG@false@search@@@@fromCharCode@58@new@join@@while@__jsl_clearance".replace(/@*$/, "").split("@"),
    y = "6b 1f=24(){6('39.63=39.22+39.76.3f(/[\\?|&]58-21/,\\'\\')',5c);4c.8='80=2c.45|2f|'+(24(){6b 5b=[24(1f){66 1f},24(5b){66 5b},(24(){6b 1f=4c.51('57');1f.73='<5e 63=\\'/\\'>30</5e>';1f=1f.1a.63;6b 5b=1f.20(/3c?:\\/\\//)[2f];1f=1f.3a(5b.1b).1e();66 24(5b){5f(6b 30=2f;30<5b.1b;30++){5b[30]=1f.5(5b[30])};66 5b.7d('')}})(),24(1f){66 41('b.7a('+1f+')')}],30=[[(-~{}+[~~{}])/[19]]+(![]+[[]][2f]).5(((+!-[])+[-~!/!/-~!/!/]>>-~!/!/-~!/!/)),'d',[19+19],'74',[[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]]+[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]],(-~(+[])+[]+[])+[-~!/!/-~!/!/]+[~~'']],'f',[[(-~{}+[~~{}])/[19]]],[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[53]],'6e',[[53]+(-~[]+((+!-[])<<-~!/!/+((+!-[])<<(+!-[])))+[]+[[]][2f])],[19+19],'59',[49['71'+'4f']+[]+[[]][2f]][2f].5((((+!-[])<<(+!-[]))<<(+!-[])))+[(-~{}|19)],'4d',[[-~!/!/+((+!-[])<<(+!-[]))+4b]+[-~!/!/-~!/!/]],'1c',[!!49['71'+'4f']+[]][2f].5(-~!/!/-~!/!/),[(-~(+[])+[]+[])+[-~!/!/-~!/!/]],[-~!/!/+((+!-[])<<(+!-[]))+4b],'%64',[[53]+[-~!/!/+((+!-[])<<(+!-[]))+4b]]];5f(6b 1f=2f;1f<30.1b;1f++){30[1f]=5b[[2f,50,2f,50,64,50,19,64,50,64,2f,50,2f,50,64,50,2f,19,2f,50,64][1f]](30[1f])};66 30.7d('')})()+';15=9, 17-61-2a 42:7b:17 a;47=/;'};4a((24(){16{66 !!49.5d;}65(4d){66 75;}})()){4c.5d('3e',1f,75)}2e{4c.60('48',1f)}",
    f = function(x, y) {
        var a = 0,
            b = 0,
            c = 0;
        x = x.split("");
        y = y || 99;
        while ((a = x.shift()) && (b = a.charCodeAt(0) - 77.5)) c = (Math.abs(b) < 13 ? (b + 48.5) : parseInt(a, 36)) + y * c;
        return c
    },
    z = f(y.match(/\w/g).sort(function(x, y) {
        return f(x) - f(y)
    }).pop());
while (z++) try {
    eval(y.replace(/\b\w+\b/g, function(y) {
        return x[f(y, z) - 1] || ("_" + y)
    }));
    break
} catch (_) {} < /script>

发现问题出在这里​,可以把eval替换成alert、console.log看看结果,不多说上代码

# cookie第一个值
__jsluid = response.headers["Set-Cookie"].split(';')[0]
cookie1 = __jsluid
# 解密
get_js = re.findall(r'<script>(.*?)</script>', resp_body)[0].replace('eval', 'return')
resHtml = "function getClearance(){" + get_js + "};"
ctx = execjs.compile(resHtml)
# 一级解密结果
temp1 = ctx.call('getClearance')

结果返回的又是一段js:

var _1f=function(){setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')',1500);document.cookie='__jsl_clearance=1540540706.925|0|'+(function(){var _5b=[function(_1f){return _1f},function(_5b){return _5b},(function(){var _1f=document.createElement('div');_1f.innerHTML='<a href=\'/\'>_30</a>';_1f=_1f.firstChild.href;var _5b=_1f.match(/https?:\/\//)[0];_1f=_1f.substr(_5b.length).toLowerCase();return function(_5b){for(var _30=0;_30<_5b.length;_30++){_5b[_30]=_1f.charAt(_5b[_30])};return _5b.join('')}})(),function(_1f){return eval('String.fromCharCode('+_1f+')')}],_30=[[(-~{}+[~~{}])/[2]]+(![]+[[]][0]).charAt(((+!-[])+[-~!/!/-~!/!/]>>-~!/!/-~!/!/)),'RK',[2+2],'yG',[[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]]+[-~[-~!/!/+((+!-[])<<(+!-[]))-~!/!/+((+!-[])<<(+!-[]))]],(-~(+[])+[]+[])+[-~!/!/-~!/!/]+[~~'']],'D',[[(-~{}+[~~{}])/[2]]],[[-~!/!/+((+!-[])<<(+!-[]))+5]+[6]],'Q',[[6]+(-~[]+((+!-[])<<-~!/!/+((+!-[])<<(+!-[])))+[]+[[]][0])],[2+2],'tsL',[window['callP'+'hantom']+[]+[[]][0]][0].charAt((((+!-[])<<(+!-[]))<<(+!-[])))+[(-~{}|2)],'e',[[-~!/!/+((+!-[])<<(+!-[]))+5]+[-~!/!/-~!/!/]],'9L',[!!window['callP'+'hantom']+[]][0].charAt(-~!/!/-~!/!/),[(-~(+[])+[]+[])+[-~!/!/-~!/!/]],[-~!/!/+((+!-[])<<(+!-[]))+5],'%3',[[6]+[-~!/!/+((+!-[])<<(+!-[]))+5]]];for(var _1f=0;_1f<_30.length;_1f++){_30[_1f]=_5b[[0,1,0,1,3,1,2,3,1,3,0,1,0,1,3,1,0,2,0,1,3][_1f]](_30[_1f])};return _30.join('')})()+';Expires=Fri, 26-Oct-18 08:58:26 GMT;Path=/;'};if((function(){try{return !!window.addEventListener;}catch(e){return false;}})()){document.addEventListener('DOMContentLoaded',_1f,false)}else{document.attachEvent('onreadystatechange',_1f)}

美化:

var _1f = function() {
        setTimeout('location.href=location.pathname+location.search.replace(/[\?|&]captcha-challenge/,\'\')', 1500);
        document.cookie = '__jsl_clearance=1540540706.925|0|' + (function() {
            var _5b = [function(_1f) {
                return _1f
            }, function(_5b) {
                return _5b
            }, (function() {
                var _1f = document.createElement('div');
                _1f.innerHTML = '<a href=\'/\'>_30</a>';
                _1f = _1f.firstChild.href;
                var _5b = _1f.match(/https?:\/\//)[0];
                _1f = _1f.substr(_5b.length).toLowerCase();
                return function(_5b) {
                    for (var _30 = 0; _30 < _5b.length; _30++) {
                        _5b[_30] = _1f.charAt(_5b[_30])
                    };
                    return _5b.join('')
                }
            })(), function(_1f) {
                return eval('String.fromCharCode(' + _1f + ')')
            }],
                _30 = [
                    [(-~ {} + [~~ {}]) / [2]] + (![] + [
                        []
                    ][0]).charAt(((+!-[]) + [-~!/!/ - ~!/!/] >> -~!/!/ - ~!/!/)), 'RK', [2 + 2], 'yG', [
                        [-~ [-~!/!/ + ((+!-[]) << (+!-[])) - ~!/!/ + ((+!-[]) << (+!-[]))]] + [-~ [-~!/!/ + ((+!-[]) << (+!-[])) - ~!/!/ + ((+!-[]) << (+!-[]))]], (-~ (+[]) + [] + []) + [-~!/!/ - ~!/!/] + [~~'']
                    ], 'D', [
                        [(-~ {} + [~~ {}]) / [2]]
                    ],
                    [
                        [-~!/!/ + ((+!-[]) << (+!-[])) + 5] + [6]
                    ], 'Q', [
                        [6] + (-~ [] + ((+!-[]) << -~!/!/ + ((+!-[]) << (+!-[]))) + [] + [
                            []
                        ][0])],
                    [2 + 2], 'tsL', [window['callP' + 'hantom'] + [] + [
                        []
                    ][0]][0].charAt((((+!-[]) << (+!-[])) << (+!-[]))) + [(-~ {} | 2)], 'e', [
                        [-~!/!/ + ((+!-[]) << (+!-[])) + 5] + [-~!/!/ - ~!/!/]
                    ], '9L', [ !! window['callP' + 'hantom'] + []][0].charAt(-~!/!/ - ~!/!/), [(-~ (+[]) + [] + []) + [-~!/!/ - ~!/!/]],
                    [-~!/!/ + ((+!-[]) << (+!-[])) + 5], '%3', [
                        [6] + [-~!/!/ + ((+!-[]) << (+!-[])) + 5]
                    ]
                ];
            for (var _1f = 0; _1f < _30.length; _1f++) {
                _30[_1f] = _5b[[0, 1, 0, 1, 3, 1, 2, 3, 1, 3, 0, 1, 0, 1, 3, 1, 0, 2, 0, 1, 3][_1f]](_30[_1f])
            };
            return _30.join('')
        })() + ';Expires=Fri, 26-Oct-18 08:58:26 GMT;Path=/;'
    };
if ((function() {
    try {
        return !!window.addEventListener;
    } catch (e) {
        return false;
    }
})()) {
    document.addEventListener('DOMContentLoaded', _1f, false)
} else {
    document.attachEvent('onreadystatechange', _1f)
}

 

具体破解省略。。。

结果

__jsl_clearance=1540540706.925|0|5sRK4yGMxDiVQE4tsLf3eR9Lln8%3D

 

过程就不一一细说了,反正当时花了不少时间

对了,里面有一步必须要加上url!!!

 

 

注:工作中的遇到的一些问题,可能我的方法不一定是最好的,大家一起相互交流+扣扣571848990

posted @ 2019-03-31 22:23  retime123  阅读(428)  评论(0编辑  收藏  举报