爬虫实战:汽车之家配置页面 破解伪元素和混淆JS
本篇介绍如何破解汽车之家配置页面的伪元素和混淆的JS。
** 温馨提示:如需转载本文,请注明内容出处。**
本文链接:https://www.cnblogs.com/grom/p/9242156.html
(本文分多次编辑,可从原文章查看最新更新)
笔者爬取得网站中,印象最为深刻的就是汽车之家的网站了,也是麻烦最多的网站之一了,特点是页面大面积使用伪元素代替关键字,解析伪元素的JS进行了动态混淆,每次刷新后的JS都是不同的,页面被禁用右键菜单,无法选中或复制。
(因为破解了一周,怕分享出来后汽车之家就改了所以到现在运行了半年后才分享出来= =)
网站地址:http://car.autohome.com.cn/config/spec/25898.html
基本就是这样,如果单纯的抓取页面元素后会是这样:
开始分析:
1.整个页面通过及配置数据都是直接Write出来的,因为配置项的详情在页面JS里同页面一起生成,并非通过接口。
2.配置项数据在页面上
(PS小妙招:将网页保存本地后发现文字依旧显示,然后大面积删除JS后刷新页面,如果文字依旧显示,继续删,直到找到加载数据的JS为止)
事后发现第一个变量keyLink是左边配置名称的超链接
第二个变量config是我们要的配置上半页(到车轮制动那),
第三个变量option是主/被动安全装备及以下,
第三四个变量color和innerColor是外观内饰颜色
其他的没什么用,第五个可能是什么运动套装之类的,豪车才有,没仔细看。
3.解密JS在这里
这个JS是被混淆过的,不可以根据变量名去获取。
4.破解流程,拿到这个配置JSON串,然后找到解析JS,计算JS的变量得到字典集(一大串文字)和下标集(一大串数字集合)根据下标取字典里对应的文字,得到真正的数据字典,然后替换指定的伪元素。
5.解析被混淆的JS,格式化后可以得到这样的一串JS
提供一个完整的JS,有兴趣的小伙伴可以去研究研究
function(nv_) { var pk_ = function () { 'return pk_'; return 'S'; }; function AH_() { function _A() { return 'UV'; }; if (_A() == 'UV,') { return 'AH_'; } else { return _A(); } } function cU_() { 'return cU_'; return '万价'; } var xN_ = '元全准'; function $GetCustomStyle$() { var $customstyle$ = ''; try { if (HS_GetCustomStyle) { $customstyle$ = HS_GetCustomStyle(); } else { if (navigator.userAgent.indexOf('Windows NT 5') != -1) { $customstyle$ = 'margin-bottom:-4.8px;'; } else { $customstyle$ = 'margin-bottom:-5px;'; } } } catch (e) { } return $customstyle$; } var Qz_ = '前力功'; var rC_ = function () { 'rC_'; var _r = function () { return '动助华'; }; return _r(); }; var cO_ = function () { 'cO_'; var _c = function () { return '压'; }; return _c(); }; function ts_() { 'return ts_'; return '号合'; } var vO_ = function (vO__) { var _v = function (vO__) { 'return vO_'; return vO__; }; return _v(vO__); }; var zS_ = '喷'; function Gm_() { function _G() { return 'Gm_'; }; if (_G() == 'Gm__') { return _G(); } else { return '器国'; } } function Fo_() { function _F() { return '地'; }; if (_F() == '地') { return '地'; } else { return _F(); } } var wo_ = function (wo__) { var _w = function (wo__) { 'return wo_'; return wo__; }; return _w(wo__); }; var zk_ = function (zk__) { var _z = function (zk__) { 'return zk_'; return zk__; }; return _z(zk__); }; function WT_() { function _W() { return '子实容'; }; if (_W() == '子实容') { return '子实容'; } else { return _W(); } } var Ma_ = '宽'; var vk_ = function () { 'vk_'; var _v = function () { return '寸导小'; }; return _v(); }; var zl_ = '度式弗'; var ZS_ = function () { 'ZS_'; var _Z = function () { return '径'; }; return _Z(); }; function Wh_() { 'return Wh_'; return '悬'; } function fG_() { function _f() { return '成'; }; if (_f() == '成') { return '成'; } else { return _f(); } } function $GetClassName$($index$) { return '.hs_kw' + $index$ + '_configMd'; } function $RuleCalss1$() { return '::before {content:' } function kE_() { function _k() { return '或'; }; if (_k() == '或') { return '或'; } else { return _k(); } } function wp_() { 'return wp_'; return '扭'; } var yW_ = '承'; function bc_() { 'return bc_'; return '指'; } function tk_() { function _t() { return 'tk__'; }; if (_t() == 'tk__') { return '排'; } else { return _t(); } } var Yp_ = function () { 'return Yp_'; return '数'; }; function pR_() { function _p() { return 'pR__'; }; if (_p() == 'pR__') { return '整'; } else { return _p(); } } function BS_() { function _B() { return '最'; }; if (_B() == '最') { return '最'; } else { return _B(); } } var Bi_ = '构'; var fQ_ = '架'; function $GetWindow$() { return this['' + YE_() + (function (MR__) { 'return MR_'; return MR__; })('in') + zh_()]; } var Bh_ = function () { 'Bh_'; var _B = function () { return '标'; }; return _B(); }; var JW_ = function () { 'return JW_'; return '格'; }; function wd_() { function _w() { return 'wd__'; }; if (_w() == 'wd__') { return '梁'; } else { return _w(); } } function UX_() { function _U() { return 'UX__'; }; if (_U() == 'UX__') { return '械'; } else { return _U(); } } function QU_() { function _Q() { return '气油'; }; if (_Q() == '气油,') { return 'QU_'; } else { return _Q(); } } var Ed_ = function () { 'return Ed_'; return '测'; }; function cZ_() { 'return cZ_'; return '海液'; } var UZ_ = function (UZ__) { var _U = function (UZ__) { 'return UZ_'; return UZ__; }; return _U(UZ__); }; var vI_ = function () { 'return vI_'; return '燃'; }; var EI_ = function () { 'EI_'; var _E = function () { return '版独率'; }; return _E(); }; function DT_() { function _D() { return '盖'; }; if (_D() == '盖') { return '盖'; } else { return _D(); } } var JI_ = function (JI__) { var _J = function (JI__) { 'return JI_'; return JI__; }; return _J(JI__); }; function $Split$($item$, $index$) { if ($item$) { return $item$['' + jn_() + Dg_() + iu_()]($index$); } else { return ''; } } function YY_() { 'return YY_'; return '积'; } function hb_() { function _h() { return '称程立'; }; if (_h() == '称程立') { return '称程立'; } else { return _h(); } } var DC_ = function () { 'return DC_'; return '箱'; }; var ec_ = function () { 'return ec_'; return '综'; }; var $ruleDict$ = ''; var $rulePosList$ = ''; var Wr_ = function () { 'Wr_'; var _W = function () { return '缩'; }; return _W(); }; function zq_() { function _z() { return 'zq_'; }; if (_z() == 'zq__') { return _z(); } else { return '胎自'; } } var YS_ = function (YS__) { 'return YS_'; return YS__; }; var Hj_ = '距车转'; function Du_() { function _D() { return '轮'; }; if (_D() == '轮') { return '轮'; } else { return _D(); } } var cQ_ = function () { 'return cQ_'; return '轴载进'; }; function WM_() { 'return WM_'; return '适'; } function yQ_() { 'return yQ_'; return '速'; } var uC_ = function () { 'return uC_'; return '配量铝'; }; var lz_ = function (lz__) { var _l = function (lz__) { 'return lz_'; return lz__; }; return _l(lz__); }; var Te_ = '间隙风'; var Ph_ = function () { 'Ph_'; var _P = function () { return '马'; }; return _P(); }; function UO_() { function _U() { return '驱驻'; }; if (_U() == '驱驻,') { return 'UO_'; } else { return _U(); } } function Iw_() { 'return Iw_'; return '高麦'; } var KE_ = '7;107;3'; function HA_() { function _H() { return ';9'; }; if (_H() == ';9,') { return 'HA_'; } else { return _H(); } } function PI_() { function _P() { return 'PI_'; }; if (_P() == 'PI__') { return _P(); } else { return '5;70'; } } function yr_() { 'return yr_'; return '82,29'; } var mK_ = function () { 'return mK_'; return '1'; }; var Ff_ = '16,117;'; function $Innerhtml$($item$, $index$) { var $tempArray$ = $GetElementsByCss$($GetClassName$($item$)); for (x in $tempArray$) { $tempArray$[x].innerHTML = $index$; try { $tempArray$[x].currentStyle = ''; } catch (e) { } } } function vs_() { function _v() { return 'vs_'; }; if (_v() == 'vs__') { return _v(); } else { return '5,31'; } } var Ds_ = ';102,11'; function DV_() { function _D() { return '0;42,'; }; if (_D() == '0;42,') { return '0;42,'; } else { return _D(); } } function lU_() { function _l() { return '49;57,3'; }; if (_l() == '49;57,3') { return '49;57,3'; } else { return _l(); } } var yc_ = function (yc__) { 'return yc_'; return yc__; }; function lf_() { function _l() { return '66,'; }; if (_l() == '66,') { return '66,'; } else { return _l(); } } var IN_ = function () { 'return IN_'; return '115'; }; function Fb_() { function _F() { return 'Fb__'; }; if (_F() == 'Fb__') { return ',54;1'; } else { return _F(); } } function $InsertRule$($index$, $item$) { $sheet$['' + Mn_() + BP_ + Ni_() + FS_() + qg_() + KK_() + (function (cT__) { 'return cT_'; return cT__; })('e')]($GetClassName$($index$) + $RuleCalss1$() + '"' + $item$ + '" }', 0); var $tempArray$ = $GetElementsByCss$($GetClassName$($index$)); for (x in $tempArray$) { try { $tempArray$[x].currentStyle = ''; } catch (e) { } } } var GE_ = function () { 'GE_'; var _G = function () { return '01,11'; }; return _G(); }; function Xq_() { function _X() { return '5'; }; if (_X() == '5') { return '5'; } else { return _X(); } } var UE_ = function () { 'return UE_'; return ',54;7'; }; var Xv_ = function () { 'return Xv_'; return '4'; }; var wv_ = ';40'; function Kb_() { function _K() { return ',3'; }; if (_K() == ',3,') { return 'Kb_'; } else { return _K(); } } var Ej_ = '0,0,1'; function Xm_() { function _X() { return 'Xm_'; }; if (_X() == 'Xm__') { return _X(); } else { return ';1'; } } function NT_() { 'return NT_'; return '21,101'; } function rN_() { 'return rN_'; return ';'; } var Fc_ = function () { 'Fc_'; var _F = function () { return '7,60;'; }; return _F(); }; function $ChartAt$($item$) { return $ruleDict$['' + (function () { 'return Sm_'; return 'c' })() + aT_() + wF_()](parseInt($item$)); } function vC_() { 'return vC_'; return '98;53'; } var iB_ = function () { 'iB_'; var _i = function () { return ','; }; return _i(); }; function sn_() { 'return sn_'; return '11'; } function ZU_() { function _Z() { return 'ZU_'; }; if (_Z() == 'ZU__') { return _Z(); } else { return '2;51'; } } function lM_() { 'return lM_'; return ',105,'; } function CF_() { function _C() { return '44;67,9'; }; if (_C() == '44;67,9') { return '44;67,9'; } else { return _C(); } } function Ri_() { 'return Ri_'; return '2;6,67'; } function Ye_() { function _Y() { return 'Ye_'; }; if (_Y() == 'Ye__') { return _Y(); } else { return ';111'; } } function HB_() { 'return HB_'; return ',66;1'; } function EW_() { 'return EW_'; return '3,10'; } var cW_ = function () { 'return cW_'; return '3'; }; function $GetDefaultView$() { return nv_['' + Tb_() + Vo_() + 'au' + FI_() + ak_() + (function () { 'return Ya_'; return 'Vie' })() + (function () { 'return Ki_'; return 'w' })()]; } function Yf_() { 'return Yf_'; return ',100;37'; } var oh_ = function (oh__) { var _o = function (oh__) { 'return oh_'; return oh__; }; return _o(oh__); }; var Jn_ = '3'; function tl_() { function _t() { return ';48,'; }; if (_t() == ';48,,') { return 'tl_'; } else { return _t(); } } var xY_ = function () { 'return xY_'; return '15;88,2'; }; var AD_ = function () { 'AD_'; var _A = function () { return '1;4'; }; return _A(); }; var iX_ = function (iX__) { var _i = function (iX__) { 'return iX_'; return iX__; }; return _i(iX__); }; var Cy_ = function () { 'Cy_'; var _C = function () { return ';90,79;'; }; return _C(); }; function CV_() { 'return CV_'; return '1,10;94'; } function Xx_() { function _X() { return 'Xx__'; }; if (_X() == 'Xx__') { return ','; } else { return _X(); } } var QW_ = function () { 'QW_'; var _Q = function () { return '7'; }; return _Q(); }; function Vh_() { function _V() { return 'Vh__'; }; if (_V() == 'Vh__') { return '2'; } else { return _V(); } } function Bw_() { 'return Bw_'; return ';13,1'; } var Vs_ = '2,1'; var Sq_ = '6'; function ed_() { function _e() { return ',27;1'; }; if (_e() == ',27;1') { return ',27;1'; } else { return _e(); } } function Tn_() { function _T() { return 'Tn_'; }; if (_T() == 'Tn__') { return _T(); } else { return '23,45,'; } } function pr_() { function _p() { return 'pr__'; }; if (_p() == 'pr__') { return '8'; } else { return _p(); } } var aZ_ = function () { 'return aZ_'; return ';31,9'; }; var CL_ = '116'; function fk_() { function _f() { return 'fk__'; }; if (_f() == 'fk__') { return ';78'; } else { return _f(); } } var pz_ = function (pz__) { 'return pz_'; return pz__; }; function bC_() { function _b() { return 'bC__'; }; if (_b() == 'bC__') { return '5'; } else { return _b(); } } function $ResetSystemFun$() { if ($GetWindow$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { if (window.hs_fuckyou == undefined) { window.hs_fuckyou = $GetWindow$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()]; } } if ($GetDefaultView$()) { if ($GetDefaultView$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { if (window.hs_fuckyou_dd == undefined) { window.hs_fuckyou_dd = $GetDefaultView$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()]; } } } } var YD_ = function () { 'return YD_'; return '8,64;15'; }; var Dl_ = ',76;5'; function $InsertRuleRun$() { for ($index$ = 0; $index$ < $rulePosList$.length; $index$++) { var $tempArray$ = $Split$($rulePosList$[$index$], ','); var $temp$ = ''; for ($itemIndex$ = 0; $itemIndex$ < $tempArray$.length; $itemIndex$++) { $temp$ += $ChartAt$($tempArray$[$itemIndex$]) + ''; } $InsertRule$($index$, $temp$); } } var dl_ = function (dl__) { var _d = function (dl__) { 'return dl_'; return dl__; }; return _d(dl__); }; function jK_() { function _j() { return 'jK__'; }; if (_j() == 'jK__') { return '3,91;32'; } else { return _j(); } } function fI_() { function _f() { return ',71;'; }; if (_f() == ',71;,') { return 'fI_'; } else { return _f(); } } function Wm_() { function _W() { return '24,'; }; if (_W() == '24,') { return '24,'; } else { return _W(); } } var CP_ = function () { 'return CP_'; return '6'; }; var Ga_ = function (Ga__) { var _G = function (Ga__) { 'return Ga_'; return Ga__; }; return _G(Ga__); }; function pT_() { 'return pT_'; return ';12'; } function Ae_() { function _A() { return '2,43;'; }; if (_A() == '2,43;') { return '2,43;'; } else { return _A(); } } var Ry_ = function () { 'Ry_'; var _R = function () { return '1'; }; return _R(); }; var rM_ = '23,103,'; function XI_() { function _X() { return 'XI_'; }; if (_X() == 'XI__') { return _X(); } else { return '93;9'; } } var gk_ = '7,6'; function oQ_() { function _o() { return '2;4;1'; }; if (_o() == '2;4;1') { return '2;4;1'; } else { return _o(); } } function kp_() { 'return kp_'; return '04'; } function NC_() { function _N() { return '100;28'; }; if (_N() == '100;28,') { return 'NC_'; } else { return _N(); } } function NP_() { function _N() { return 'NP_'; }; if (_N() == 'NP__') { return _N(); } else { return ';52;'; } } var sT_ = '50,14,6'; function ux_() { function _u() { return 'ux__'; }; if (_u() == 'ux__') { return '3;50,81'; } else { return _u(); } } function hT_() { function _h() { return 'hT__'; }; if (_h() == 'hT__') { return ';'; } else { return _h(); } } function tL_() { 'return tL_'; return '90,5;'; } var sX_ = '114,4'; function qx_() { 'return qx_'; return '14;78,'; } var kS_ = function () { 'return kS_'; return '26;96,8'; }; var OC_ = function (OC__) { 'return OC_'; return OC__; }; var eT_ = function (eT__) { var _e = function (eT__) { 'return eT_'; return eT__; }; return _e(eT__); }; function yV_() { 'return yV_'; return '8;90,'; } function $GetLocationURL$() { return $GetWindow$()['' + Kp_() + Ka_() + Lw_]['' + rI_() + hw_() + MU_('f')]; } function Ra_() { function _R() { return 'Ra__'; }; if (_R() == 'Ra__') { return '46;25'; } else { return _R(); } } function Hh_() { 'return Hh_'; return ';18'; } function $SystemFunction1$($item$) { $ResetSystemFun$(); if ($GetWindow$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { $GetWindow$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] = function (element, pseudoElt) { if (pseudoElt != undefined && typeof (pseudoElt) == 'string' && pseudoElt.toLowerCase().indexOf(':before') > -1) { var obj = {}; obj.getPropertyValue = function (x) { return x; }; return obj; } else { return window.hs_fuckyou(element, pseudoElt); } }; } return $item$; } function Wc_() { function _W() { return ';'; }; if (_W() == ';') { return ';'; } else { return _W(); } } var $imgPosList$ = ''; var Rd_ = function () { 'Rd_'; var _R = function () { return '75,86;7'; }; return _R(); }; var uZ_ = function () { 'uZ_'; var _u = function () { return '3'; }; return _u(); }; function nn_() { function _n() { return ',67;9'; }; if (_n() == ',67;9') { return ',67;9'; } else { return _n(); } } function Kj_() { function _K() { return 'Kj__'; }; if (_K() == 'Kj__') { return ',41,3'; } else { return _K(); } } var Zk_ = '8;36,'; function JK_() { function _J() { return '83;35,6'; }; if (_J() == '83;35,6') { return '83;35,6'; } else { return _J(); } } var Zn_ = function (Zn__) { var _Z = function (Zn__) { 'return Zn_'; return Zn__; }; return _Z(Zn__); }; function hV_() { function _h() { return 'hV_'; }; if (_h() == 'hV__') { return _h(); } else { return ',93;'; } } var JL_ = '58,'; function $SuperInsertRule$() { if ($sheet$ !== undefined && $sheet$['' + Mn_() + BP_ + Ni_() + FS_() + qg_() + KK_() + (function (cT__) { 'return cT_'; return cT__; })('e')]) { return true; } else { return false; } } var UA_ = function () { 'UA_'; var _U = function () { return '59;106,'; }; return _U(); }; var bQ_ = '6'; var zR_ = function () { 'zR_'; var _z = function () { return '6'; }; return _z(); }; var JD_ = function (JD__) { var _J = function (JD__) { 'return JD_'; return JD__; }; return _J(JD__); }; function gs_() { function _g() { return 'gs_'; }; if (_g() == 'gs__') { return _g(); } else { return '7;66,9'; } } function pf_() { function _p() { return '0;'; }; if (_p() == '0;,') { return 'pf_'; } else { return _p(); } } var Hz_ = function (Hz__) { 'return Hz_'; return Hz__; }; function Ix_() { 'return Ix_'; return '20;'; } var fV_ = function () { 'return fV_'; return '6'; }; function xQ_() { function _x() { return 'xQ_'; }; if (_x() == 'xQ__') { return _x(); } else { return '9,119;'; } } function CE_() { function _C() { return 'CE__'; }; if (_C() == 'CE__') { return '2'; } else { return _C(); } } var fN_ = '3,12,16'; function DG_() { function _D() { return ',27'; }; if (_D() == ',27') { return ',27'; } else { return _D(); } } function JZ_() { 'return JZ_'; return ';19,'; } function uk_() { function _u() { return '89,65;1'; }; if (_u() == '89,65;1') { return '89,65;1'; } else { return _u(); } } var jW_ = function () { 'return jW_'; return '09,11'; }; var Hu_ = function () { 'Hu_'; var _H = function () { return '8;23,10'; }; return _H(); }; function Jw_() { function _J() { return 'Jw_'; }; if (_J() == 'Jw__') { return _J(); } else { return '3,'; } } var nP_ = '1'; var ZL_ = '00;20;3'; var Dw_ = function () { 'return Dw_'; return '9'; }; function iH_() { 'return iH_'; return 'get'; } function Ct_() { function _C() { return 'Co'; }; if (_C() == 'Co,') { return 'Ct_'; } else { return _C(); } } function Ap_() { function _A() { return 'Ap__'; }; if (_A() == 'Ap__') { return 'm'; } else { return _A(); } } var XV_ = function () { 'return XV_'; return 'put'; }; function GP_() { 'return GP_'; return 'edS'; } var BJ_ = function () { 'BJ_'; var _B = function () { return 't'; }; return _B(); }; var fB_ = function () { 'return fB_'; return 'y'; }; function iz_() { function _i() { return 'le'; }; if (_i() == 'le,') { return 'iz_'; } else { return _i(); } } function Mn_() { function _M() { return 'i'; }; if (_M() == 'i') { return 'i'; } else { return _M(); } } var BP_ = 'nse'; var Ni_ = function () { 'Ni_'; var _N = function () { return 'r'; }; return _N(); }; function FS_() { 'return FS_'; return 't'; } var qg_ = function () { 'qg_'; var _q = function () { return 'R'; }; return _q(); }; function KK_() { 'return KK_'; return 'ul'; } function YE_() { 'return YE_'; return 'w'; } function zh_() { function _z() { return 'zh__'; }; if (_z() == 'zh__') { return 'dow'; } else { return _z(); } } var Tb_ = function () { 'Tb_'; var _T = function () { return 'd'; }; return _T(); }; function Vo_() { function _V() { return 'Vo_'; }; if (_V() == 'Vo__') { return _V(); } else { return 'ef'; } } var FI_ = function () { 'FI_'; var _F = function () { return 'l'; }; return _F(); }; function ak_() { function _a() { return 't'; }; if (_a() == 't') { return 't'; } else { return _a(); } } function $SystemFunction2$($item$) { $ResetSystemFun$(); if ($GetDefaultView$()) { if ($GetDefaultView$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] != undefined) { $GetDefaultView$()['' + iH_() + Ct_() + Ap_() + XV_() + GP_() + BJ_() + fB_() + iz_()] = function (element, pseudoElt) { if (pseudoElt != undefined && typeof (pseudoElt) == 'string' && pseudoElt.toLowerCase().indexOf(':before') > -1) { var obj = {}; obj.getPropertyValue = function (x) { return x; }; return obj; } else { return window.hs_fuckyou_dd(element, pseudoElt); } }; } } return $item$; } function $FillDicData$() { $ruleDict$ = $GetWindow$()['' + ht_() + Sc_() + (function () { 'return vW_'; return 'e' })() + (function () { 'return FC_'; return 'URI' })() + UU_ + gA_ + Qg_() + Ec_ + ZP_()]('' + pk_() + AH_() + cU_() + (function () { 'return KF_'; return '体供保' })() + xN_ + '列制' + Qz_ + rC_() + cO_() + ts_() + '名后' + vO_('吸商') + zS_ + Gm_() + Fo_() + wo_('型备') + zk_('多大') + WT_() + Ma_ + vk_() + '尺年' + zl_ + ZS_() + Wh_() + fG_() + kE_() + wp_() + yW_ + bc_() + tk_() + Yp_() + pR_() + (function () { 'return KX_'; return '时' })() + BS_() + (function () { 'return Ty_'; return '机' })() + Bi_ + fQ_ + Bh_() + JW_() + wd_() + UX_() + (function () { 'return PM_'; return '比' })() + QU_() + Ed_() + cZ_() + UZ_('点然') + vI_() + EI_() + (function (eL__) { 'return eL_'; return eL__; })('环电') + DT_() + JI_('盘矩') + (function () { 'return ez_'; return '离' })() + YY_() + hb_() + DC_() + ec_() + Wr_() + (function () { 'return xW_'; return '缸' })() + (function (gW__) { 'return gW_'; return gW__; })('置耗') + zq_() + YS_('舒行') + (function () { 'return BN_'; return '规豪质' })() + Hj_ + Du_() + cQ_() + WM_() + (function (yl__) { 'return yl_'; return yl__; })('逊通') + yQ_() + uC_() + lz_('长门') + Te_ + Ph_() + UO_() + Iw_() + $SystemFunction1$('')); $rulePosList$ = $Split$(($SystemFunction1$('') + '' + (function () { 'return Xs_'; return '77,' })() + KE_ + HA_() + PI_() + (function (vJ__) { 'return vJ_'; return vJ__; })(',19;') + yr_() + (function () { 'return Uj_'; return ',' })() + mK_() + Ff_ + (function () { 'return lX_'; return '67,87;5' })() + vs_() + Ds_ + DV_() + lU_() + yc_('3;') + lf_() + IN_() + Fb_() + GE_() + Xq_() + UE_() + Xv_() + wv_ + Kb_() + Ej_ + ',2' + Xm_() + NT_() + rN_() + (function (qt__) { 'return qt_'; return qt__; })('23,4') + Fc_() + vC_() + iB_() + sn_() + ZU_() + lM_() + CF_() + Ri_() + Ye_() + HB_() + EW_() + cW_() + Yf_() + oh_(',4') + Jn_ + tl_() + xY_() + AD_() + iX_('7,60') + Cy_() + (function () { 'return zK_'; return '6' })() + CV_() + Xx_() + QW_() + Vh_() + Bw_() + Vs_ + Sq_ + ed_() + Tn_() + '10' + pr_() + aZ_() + (function () { 'return VT_'; return '3;56,' })() + CL_ + fk_() + pz_(',34;') + bC_() + YD_() + Dl_ + dl_('3,11;1') + jK_() + fI_() + Wm_() + CP_() + Ga_('6;13') + ',47,60' + pT_() + Ae_() + Ry_() + (function () { 'return VR_'; return '1' })() + (function (YX__) { 'return YX_'; return YX__; })('3;') + rM_ + XI_() + gk_ + oQ_() + kp_() + (function () { 'return eq_'; return ',' })() + NC_() + NP_() + sT_ + ux_() + hT_() + tL_() + sX_ + (function () { 'return FK_'; return '3;17,' })() + qx_() + kS_() + OC_('5;80') + eT_(',44;') + yV_() + Ra_() + Hh_() + Wc_() + Rd_() + uZ_() + nn_() + Kj_() + Zk_ + JK_() + Zn_('8;') + (function (GM__) { 'return GM_'; return GM__; })('13,103') + hV_() + JL_ + UA_() + bQ_ + zR_() + JD_(';22,84') + (function (Wf__) { 'return Wf_'; return Wf__; })(';99,') + gs_() + pf_() + (function () { 'return Ia_'; return '99,112;' })() + Hz_('13,1') + Ix_() + fV_() + xQ_() + CE_() + fN_ + DG_() + JZ_() + uk_() + jW_() + Hu_() + Jw_() + nP_ + ZL_ + Dw_()), $SystemFunction2$(';')); $imgPosList$ = $Split$(('##imgPosList_jsFuns##' + $SystemFunction2$(';')), $SystemFunction1$(';')); $RenderToHTML$(); return ';'; } function $GetElementsByCss$($item$) { return document.querySelectorAll($item$); } function Rm_() { function _R() { return 'g'; }; if (_R() == 'g') { return 'g'; } else { return _R(); } } var sf_ = function () { 'sf_'; var _s = function () { return 'e'; }; return _s(); }; var kJ_ = function () { 'kJ_'; var _k = function () { return 'P'; }; return _k(); }; var VZ_ = function (VZ__) { 'return VZ_'; return VZ__; }; function Bf_() { function _B() { return 'Bf__'; }; if (_B() == 'Bf__') { return 'p'; } else { return _B(); } } var UF_ = function () { 'UF_'; var _U = function () { return 'e'; }; return _U(); }; var pB_ = function () { 'return pB_'; return 'r'; }; function ry_() { function _r() { return 'ry_'; }; if (_r() == 'ry__') { return _r(); } else { return 'Va'; } } function XP_() { function _X() { return 'XP__'; }; if (_X() == 'XP__') { return 'l'; } else { return _X(); } } var Yy_ = function () { 'return Yy_'; return 'u'; }; var ue_ = function () { 'ue_'; var _u = function () { return 'e'; }; return _u(); }; var Kp_ = function () { 'Kp_'; var _K = function () { return 'loc'; }; return _K(); }; function Ka_() { function _K() { return 'Ka__'; }; if (_K() == 'Ka__') { return 'ati'; } else { return _K(); } } var Lw_ = 'on'; var rI_ = function () { 'return rI_'; return 'h'; }; function hw_() { function _h() { return 'hw_'; }; if (_h() == 'hw__') { return _h(); } else { return 're'; } } var MU_ = function (MU__) { 'return MU_'; return MU__; }; function jn_() { 'return jn_'; return 's'; } function Dg_() { function _D() { return 'Dg__'; }; if (_D() == 'Dg__') { return 'pli'; } else { return _D(); } } var iu_ = function () { 'iu_'; var _i = function () { return 't'; }; return _i(); }; var $style$ = nv_.createElement('style'); if (nv_.head) { nv_.head.appendChild($style$); } else { nv_.getElementsByTagName('head')[0].appendChild($style$); } var $sheet$ = $style$.sheet; function ht_() { function _h() { return 'ht_'; }; if (_h() == 'ht__') { return _h(); } else { return 'de'; } } function Sc_() { 'return Sc_'; return 'cod'; } function $RenderToHTML$() { $InsertRuleRun$(); } var UU_ = 'C'; var gA_ = 'o'; function Qg_() { function _Q() { return 'mpo'; }; if (_Q() == 'mpo') { return 'mpo'; } else { return _Q(); } } var Ec_ = 'nen'; var ZP_ = function () { 'ZP_'; var _Z = function () { return 't'; }; return _Z(); }; function aT_() { function _a() { return 'aT__'; }; if (_a() == 'aT__') { return 'har'; } else { return _a(); } } function wF_() { function _w() { return 'At'; }; if (_w() == 'At,') { return 'wF_'; } else { return _w(); } } var yd_ = $FillDicData$('aJ_'); function Xn_() { function _X() { return '_;_'; }; if (_X() == '_;_') { return '_;_'; } else { return _X(); } } function iJ_() { 'return iJ_'; return ';'; } function bN_() { 'return bN_'; return '7'; } var vY_ = ';'; function PG_() { 'return PG_'; return '_0'; } var FG_ = function () { 'return FG_'; return '3'; }; function uV_() { function _u() { return '6'; }; if (_u() == '6') { return '6'; } else { return _u(); } } var lI_ = function () { 'return lI_'; return '3;7'; }; })(document);
6.全文所有JS代码因为被混淆,可能会有差异,但结构一样,可仔细寻找。
开始解析:
如上图所示,里面有好多函数和变量,里面会返回一段文字或者符号,这些零零散散的文字将被组成一个完成数据字典库,
大致分为这几种:
直接变量赋值的,如
var mH_ = '例'
通过函数为变量赋值的,值等于return后面的字符串,如
var lI_ = function() { 'return lI_'; return '3;7'; };
函数,调用的时候获得值,值等于return后面的字符串,如
function hw_() { function _h() { return 'hw_'; }; if (_h() == 'hw__') { return _h(); } else { return 're'; } }
(其实笔者想过使用.net直接运行JS,后来发现他们这个JS是有错误的,并且(……)(document)这种形式使用MSScriptControl.ScriptControl和JScript都无法识别,只能硬着头皮分析了。。。如果有能识别这种JS,求留言推荐,十分感谢。)
众所周知,函数是需要被调用才能运行的,那么入口呢,就很巧妙的隐藏在了这里 ↓
var HH_ = $FillDicData$('iU_');
接着会跳到这个函数
这个就是调用上面的那些大部分的变量组成字典集
紧接着下面的这个方法就是获取下标集合
这个方法实现根据坐标集取得字典 ,注意这个方法是不混淆的!可以直接搜索方法名找到。
"77,7"就是"环保" 通过这种方式替换页面的伪元素
分析到这里了,后面也就不难了,不再详细说明,如有不明白的,可以留言给我。
获取数据字典,模拟了刚才分析的JS
1 #region 获取汽车之家车辆信息 2 /// <summary> 3 /// 获取汽车之家车辆信息 4 /// </summary> 5 /// <param name="Parameter">参数(汽车之家ID或者Url)</param> 6 /// <param name="Url">是否为Url</param> 7 /// <param name="JsonKeyLink"></param> 8 /// <param name="JsonConfig"></param> 9 /// <param name="JsonOption"></param> 10 /// <param name="JsonColor"></param> 11 /// <param name="JsonInnerColor"></param> 12 /// <param name="JsonBag"></param> 13 /// <param name="ErrorMessage"></param> 14 /// <returns></returns> 15 public bool GetAutoHomeCarInfo(string Parameter, bool Url, ref string JsonKeyLink, ref string JsonConfig, ref string JsonOption, ref string JsonColor, ref string JsonInnerColor, ref string JsonBag, ref string ErrorMessage) 16 { 17 if (Url) return false; 18 #region 19 try 20 { 21 //这里的变量是车型ID 22 string strUrl = Url ? Parameter : "http://car.autohome.com.cn/config/spec/" + Parameter + ".html"; 23 HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create(strUrl); 24 webrequest.AllowAutoRedirect = true; 25 webrequest.Timeout = 30000; 26 CookieContainer c = new CookieContainer(); 27 webrequest.CookieContainer = c; 28 HttpWebResponse response = (HttpWebResponse)webrequest.GetResponse(); 29 StreamReader read = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("utf-8")); 30 string strAllHTML = read.ReadToEnd(); 31 32 #region 获取数据字典 33 string[] KeyLink = null; 34 string[] Configpl = null; 35 string[] Optionpl = null; 36 GetAutoHomeDictionary(strAllHTML, ref KeyLink, ref Configpl, ref Optionpl); 37 #endregion 38 39 MatchCollection carInfoMatches = Regex.Matches(strAllHTML, "<script type=\"text/javascript\">((?:.|\\n)*?)</script>"); 40 string strCarInfo = string.Empty; 41 for (int i = 0; i < carInfoMatches.Count; i++) 42 { 43 if (carInfoMatches[i].Result("$1").Trim().IndexOf("var option =") > 0) strCarInfo = carInfoMatches[i].Result("$1").Trim(); 44 } 45 if (strCarInfo != string.Empty) 46 { 47 Hashtable htCarInfo = new Hashtable(); 48 if (strCarInfo.IndexOf("var keyLink =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var keyLink ="), "JsonKeyLink"); 49 if (strCarInfo.IndexOf("var config =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var config ="), "JsonConfig"); 50 if (strCarInfo.IndexOf("var option =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var option ="), "JsonOption"); 51 if (strCarInfo.IndexOf("var color =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var color ="), "JsonColor"); 52 if (strCarInfo.IndexOf("var innerColor =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var innerColor ="), "JsonInnerColor"); 53 if (strCarInfo.IndexOf("var bag =") > -1) htCarInfo.Add(strCarInfo.IndexOf("var bag ="), "JsonBag"); 54 ArrayList arrayList = new ArrayList(htCarInfo.Keys); 55 arrayList.Sort(); 56 for (int i = 0; i < arrayList.Count; i++) 57 { 58 //有些没有的字典和解析JS要筛掉 59 string JsonTemp = string.Empty; 60 if (i == arrayList.Count - 1) 61 { 62 continue; 63 JsonTemp = strCarInfo.Substring(int.Parse(arrayList[i].ToString()), strCarInfo.Length - int.Parse(arrayList[i].ToString())); 64 JsonTemp = JsonTemp.Substring(0, JsonTemp.IndexOf("]}};")) + "]}};"; 65 } 66 else 67 { 68 JsonTemp = strCarInfo.Substring(int.Parse(arrayList[i].ToString()), int.Parse(arrayList[i + 1].ToString()) - int.Parse(arrayList[i].ToString())); 69 } 70 //if (JsonTemp.IndexOf("_baikeVJ") > 0) 71 if (Regex.IsMatch(JsonTemp, @"<span class='hs_kw.*?_baike\w{0,2}'></span>")) 72 { 73 string tmp = JsonTemp.Substring(JsonTemp.IndexOf("_baike") , 8); 74 for (int j = 0; j < KeyLink.Length; j++) 75 { 76 JsonTemp = JsonTemp.Replace("<span class='hs_kw" + j + tmp + "'></span>", KeyLink[j]); 77 } 78 } 79 if (Regex.IsMatch(JsonTemp, @"<span class='hs_kw.*?_config\w{0,2}'></span>")) 80 { 81 string tmp = JsonTemp.Substring(JsonTemp.IndexOf("_config"), 9); 82 for (int j = 0; j < Configpl.Length; j++) 83 { 84 JsonTemp = JsonTemp.Replace("<span class='hs_kw" + j + tmp + "'></span>", Configpl[j]); 85 } 86 } 87 if (Regex.IsMatch(JsonTemp, @"<span class='hs_kw.*?_option\w{0,2}'></span>")) 88 { 89 string tmp = JsonTemp.Substring(JsonTemp.IndexOf("_option"), 9); 90 for (int j = 0; j < Optionpl.Length; j++) 91 { 92 JsonTemp = JsonTemp.Replace("<span class='hs_kw" + j + tmp + "'></span>", Optionpl[j]); 93 } 94 } 95 switch (htCarInfo[arrayList[i]].ToString()) 96 { 97 //这里只解析了左边配置栏和上下配置,其他可自行修改 98 case "JsonKeyLink": 99 JsonTemp = JsonTemp.Replace("var keyLink =", string.Empty).Replace(";", string.Empty).Trim(); 100 JsonKeyLink = JsonTemp; 101 break; 102 case "JsonConfig": 103 JsonTemp = JsonTemp.Replace("var config =", string.Empty).Replace(";", string.Empty).Trim(); 104 JsonConfig = JsonTemp; 105 break; 106 case "JsonOption": 107 JsonTemp = JsonTemp.Replace("var option =", string.Empty).Replace(";", string.Empty).Trim(); 108 JsonOption = JsonTemp; 109 break; 110 } 111 } 112 } 113 return true; 114 } 115 catch (Exception Ex) 116 { 117 ErrorMessage = Ex.Message; 118 return false; 119 } 120 #endregion 121 } 122 #endregion
破解数据字典,其实就是模拟我们上面分析的JS解析过程,其中用到大量的正则分别处理不同格式的数据集
1 /// <summary> 2 /// 获取数据字典 3 /// </summary> 4 /// <param name="strAllHTML"></param> 5 /// <param name="keyLink"></param> 6 /// <param name="configpl"></param> 7 /// <param name="optionpl"></param> 8 public void GetAutoHomeDictionary(string strAllHTML, ref string[] keyLink, ref string[] configpl, ref string[] optionpl) 9 { 10 MatchCollection carInfoMatches = Regex.Matches(strAllHTML, "<script>((?:.|\\n)*?)</script>"); 11 List<string> matcheslist = new List<string>(); 12 foreach (var item in carInfoMatches) 13 { 14 if (item.ToString().IndexOf("try{document.") < 0 && item.ToString().Length > 500) 15 { 16 matcheslist.Add(item.ToString()); 17 } 18 } 19 for (int i = 0; i < matcheslist.Count; i++) 20 { 21 #region 生成文字集1 22 Dictionary<string, string> dc = new Dictionary<string, string>(); 23 MatchCollection matchlist = Regex.Matches(matcheslist[i].Replace("})(document);</script>", " function"), @"function\s(\S){0,2}_\(\)\s*\{.*?\}.*?(?=function)");//取出function 24 for ( int j = 0; j < matchlist.Count; j++) 25 { 26 string str1 = string.Empty, str2 = string.Empty; 27 getStr(matchlist[j].Value, ref str1, ref str2); 28 dc.Add(str1, str2); 29 } 30 try 31 { 32 MatchCollection matchlist2 = Regex.Matches(matcheslist[i], @"var\s?\S\S_=\s?'\S*'");//取出赋值变量 33 for (int j = 0; j < matchlist2.Count; j++) 34 { 35 string str1 = string.Empty, str2 = string.Empty; 36 getStr2(matchlist2[j].Value, ref str1, ref str2); 37 dc.Add(str1, str2); 38 } 39 40 MatchCollection matchlist3 = Regex.Matches(matcheslist[i], @"var\s?\S\S_=\s?function\s?\(\)\s?\{.*?return.*?return.*?\}");//取出赋值函数 41 for (int j = 0; j < matchlist3.Count; j++) 42 { 43 string str1 = string.Empty, str2 = string.Empty; 44 getStr3(matchlist3[j].Value, ref str1, ref str2); 45 dc.Add(str1, str2); 46 } 47 } 48 catch (Exception ex) 49 { 50 throw ex; 51 } 52 StringBuilder sb = new StringBuilder(); 53 string str = Regex.Match(matcheslist[i], @"function\s*\$FillDicData\$\s*\(\)\s*?{.*?\$RenderToHTML").Value; 54 string tmp2 = str.Substring(str.IndexOf("$GetWindow$()"), str.IndexOf("$rulePosList$") - str.IndexOf("$GetWindow$()")); 55 string tmp3 = tmp2.Substring(tmp2.IndexOf(']') + 1); 56 string[] tmp4 = tmp3.Split('+'); 57 try 58 { 59 for (int j = 1; j < tmp4.Length - 1; j++) 60 { 61 //if (Regex.IsMatch(tmp4[j], @"[\u4e00-\u9fbb]{1,5}")) 62 //{ 63 // sb.Append(Regex.Match(tmp4[j], @"[\u4e00-\u9fbb]{1,5}").ToString()); 64 //} 65 if (Regex.IsMatch(tmp4[j], @"\(function\s{0,3}\(\)\{.*?return.*?return.*?\}\)")) 66 { 67 var strtmp = Regex.Match(tmp4[j], @"\(function\s{0,3}\(\)\{.*?return.*?return.*?\}\)").Value; 68 var strtmp2 = Regex.Match(strtmp, "return.*?(.*?).*?return.*(.*?)").Value.Split(new string[] { "return" }, StringSplitOptions.RemoveEmptyEntries); 69 foreach (var item in strtmp2) 70 { 71 if (item.Split('\'').Length == 3) sb.Append(item.Split('\'')[1].Replace("'", "").Trim()); 72 } 73 } 74 else if (Regex.IsMatch(tmp4[j], @"\('([A-Z]|[a-z]|[0-9]|[,]|[']|[;]|[\u4e00-\u9fbb]){1,10}'\)")) 75 { 76 sb.Append(Regex.Match(tmp4[j], @"\('([A-Z]|[a-z]|[0-9]|[,]|[']|[;]|[\u4e00-\u9fbb]){1,10}(?='\))").ToString().Substring(2)); 77 } 78 else if (Regex.IsMatch(tmp4[j], @"\(\)")) 79 { 80 sb.Append(dc[tmp4[j].Replace("()", "")]); 81 } 82 else if (Regex.IsMatch(tmp4[j], @"'([A-Z]|[a-z]|[0-9]|[,]|[']|[;]|[\u4e00-\u9fbb]){1,10}'(?!\))")) 83 { 84 sb.Append(Regex.Match(tmp4[j], @"'([A-Z]|[a-z]|[0-9]|[,]|[']|[;]|[\u4e00-\u9fbb]){1,10}'").ToString().Replace("'","")); 85 } 86 else if (Regex.IsMatch(tmp4[j], @"\S{3}")) 87 { 88 sb.Append(dc[tmp4[j]]); 89 } 90 else 91 { 92 sb.Append("X"); 93 } 94 } 95 } 96 catch (Exception ex) 97 { 98 throw; 99 } 100 #endregion 101 102 #region 取下标 103 string tmp11 = str.Substring(str.IndexOf("$rulePosList$")); 104 string tmp12 = tmp11.Substring(0, tmp11.IndexOf("$SystemFunction2$")); 105 StringBuilder sb2 = new StringBuilder(); 106 string[] tmp13 = tmp12.Split('+'); 107 try 108 { 109 tmp13[tmp13.Length - 1] = tmp13[tmp13.Length - 1].Replace("),", ""); 110 for (int j = 1; j < tmp13.Length; j++) 111 { 112 if (Regex.IsMatch(tmp13[j], @"\('([A-Z]|[a-z]|[0-9]|[,]|[']|[;]|[\u4e00-\u9fbb]){1,10}'\)")) 113 { 114 sb2.Append(Regex.Match(tmp13[j], @"\('([A-Z]|[a-z]|[0-9]|[,]|[']|[;]|[\u4e00-\u9fbb]){1,10}(?='\))").ToString().Substring(2)); 115 } 116 else if (Regex.IsMatch(tmp13[j], @"return\s{0,2}'([0-9]|[,]|[;]){1,10}'")) 117 { 118 var tmp = Regex.Match(tmp13[j], @"return\s{0,2}'([0-9]|[,]|[;]){1,10}'").Value.ToLower().Replace("return", "").Replace("'", "").Trim(); 119 sb2.Append(tmp); 120 } 121 else if (Regex.IsMatch(tmp13[j], @"\(\)")) 122 { 123 tmp13[j] = tmp13[j].Substring(0, tmp13[j].IndexOf("()") + 2); 124 sb2.Append(dc[tmp13[j].Replace("()", "")]); 125 } 126 else if (Regex.IsMatch(tmp13[j], @"\S{3}") && tmp13[j].IndexOf("'") < 0) 127 { 128 sb2.Append(dc[tmp13[j]]); 129 } 130 else if (tmp13[j].Split(new string[] { "'" }, StringSplitOptions.None).Length > 2) 131 { 132 sb2.Append(tmp13[j].Replace("'", "").Trim()); 133 } 134 else if (tmp13[j].Trim() == "''") 135 { 136 continue; 137 } 138 else 139 { 140 sb2.Append("X"); 141 } 142 } 143 } 144 catch (Exception ex) 145 { 146 throw; 147 } 148 149 #endregion 150 151 #region 生成字典 152 List<string> list = new List<string>(); 153 try 154 { 155 foreach (var item in sb2.ToString().Split(';')) 156 { 157 var numlist = item.Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries); 158 StringBuilder sbresult = new StringBuilder(); 159 foreach (var num in numlist) 160 { 161 var tmpstr = sb.ToString()[Cvt.ToInt32(num)]; 162 sbresult.Append(tmpstr); 163 } 164 list.Add(sbresult.ToString()); 165 } 166 } 167 catch (Exception e) 168 { 169 170 throw; 171 } 172 173 #endregion 174 175 if (i == 0) keyLink = list.ToArray(); 176 else if (i == 1) configpl = list.ToArray(); 177 else if (i == 2) optionpl = list.ToArray(); 178 } 179 }
1 /// <summary> 2 /// 格式化字符串 3 /// </summary> 4 /// <param name="str"></param> 5 /// <param name="resultKey"></param> 6 /// <param name="resultValue"></param> 7 public void getStr(string str, ref string resultKey, ref string resultValue) 8 { 9 try 10 { 11 if (str.IndexOf("var") > 0) 12 { 13 str = str.Substring(0, str.IndexOf("var")); 14 } 15 resultKey = str.Split(new string[] { "()" }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault().Replace("function", "").Trim(); 16 resultValue = JSHelper.ExecJs(str + " " + resultKey + "();").ToString(); 17 return; 18 } 19 catch (Exception ex) 20 { 21 throw; 22 } 23 } 24 public void getStr2(string str, ref string resultKey, ref string resultValue) 25 { 26 try 27 { 28 string[] str2 = str.Replace("var", "").Replace("\'", "").Trim().Split('='); 29 resultKey = str2[0]; 30 resultValue = str2[1]; 31 } 32 catch (Exception ex) 33 { 34 throw ex; 35 } 36 } 37 public void getStr3(string str, ref string resultKey, ref string resultValue) 38 { 39 try 40 {//var AC_=function(){'AC_';var _A=function(){return '格';}; return _A();} 41 string[] str2 = str.Replace("var", "").Trim().Split('='); 42 resultKey = str2[0]; 43 if (str.Split(new string[] { "function" }, StringSplitOptions.None).Length > 2) 44 { 45 string str3 = Regex.Match(str, @"var\s?\S\S_=\s?function\s?\(\S{0,5}\)\s?\{.*?return.*?\}").Value;//取出赋值函数 46 string str4 = str3.Substring(str3.IndexOf("return") + 6); 47 string[] str5 = str4.Split(new string[] { "\'" }, StringSplitOptions.None); 48 resultValue = str5[1]; 49 } 50 else 51 { 52 string str3 = str2[str2.Length - 1].Substring(str2[str2.Length - 1].LastIndexOf("return")); 53 string[] str4 = str3.Split('\''); 54 resultValue = str4[1]; 55 } 56 } 57 catch (Exception ex) 58 { 59 throw ex; 60 } 61 }
文中部分解析直接将变量丢进了JS里执行,这个破解比较早,用的JScript,现在推荐使用MSScriptControl.ScriptControl,这个是com组件里的。
1 using Microsoft.JScript; 2 using Microsoft.JScript.Vsa; 3 using System; 4 using System.CodeDom.Compiler; 5 using System.Collections.Generic; 6 using System.Linq; 7 using System.Reflection; 8 using System.Text; 9 using System.Threading.Tasks; 10 11 namespace library 12 { 13 public static class JSHelper 14 { 15 static VsaEngine Engine = VsaEngine.CreateEngine(); 16 public static object ExecJs(string str) 17 { 18 return EvalJScript(str); 19 } 20 public static object EvalJScript(string JScript) 21 { 22 object Result = null; 23 try 24 { 25 Result = Microsoft.JScript.Eval.JScriptEvaluate(JScript, Engine); 26 } 27 catch (Exception ex) 28 { 29 return ex.Message; 30 } 31 return Result; 32 33 } 34 } 35 }
这种稍微复杂点的爬虫真的十分锻炼分析能力和耐心,这也是笔者认为开发者十分重要的一种能力,而对于.net这种门槛较低,技术能力金字塔分布的开发群体,真的需要我们好好钻研技术。
如有不明或更好的建议,欢迎留言交流。
作者:Grom
出处:http://www.cnblogs.com/grom/
Where there is a will there is the way.