沫沫漫画网Js逆向分析爬取全站资源入库处理图片合并
网站分析
-
打开目标网站:https://www.momomh.com/
-
选择一部漫画作为分析对象:《渴望:爱火难耐》
-
进到漫画详情页这里,发现并没有需要逆向分析。直接可以获取漫画信息。随便点击一章进去:渴望:爱火难耐-第1话
-
F12 打开开发者工具,选择elements查看源码。找到突破口,发现在某个script标签下,有一段加密得字符串
-
由里到外对这一行代码进行分析,其中 _0x232c('0x7', 'T]C8') 意思就是往_0x232c 函数出入两个值不变得字符串,得到一个固定得返回结果。
换句话说, 就是_0x232c('0x7', 'T]C8') 这个结果就是个固定值EReVr
-
向外扩散分析 ,其中_0xe1f02a[_0x232c('0x7', 'T]C8')]和上面分析一个套路,得到一个固定返回值
-
点击输出得函数进行跳转分析代码,其实就是在我们断点的上面
-
稍微看下就能得出,其实这个函数就是把第一个参数作为函数, 第二个参数作为第一个参数函数的参数。
所以总结得出打断点处的这段代码就是
-
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
function
_0x317597(_0x1b9bc1) {
var
_0x42f71a = CryptoJS[_0x232c(
'0x11'
,
'e*R8'
)][
'Utf8'
][_0x232c(
'0x12'
,
'e*R8'
)](_0x1b9bc1[
'k'
]);
var
_0x2ea3c6 = CryptoJS[_0x232c(
'0x13'
,
'O3X#'
)][_0x232c(
'0x14'
,
'e*R8'
)](_0x1b9bc1[
'i'
], _0x42f71a, {
'iv'
: _0x42f71a,
'padding'
: CryptoJS[
'pad'
][_0x232c(
'0x15'
,
'oEHH'
)]
});
_0x2ea3c6 = _0x2ea3c6[_0x232c(
'0x16'
,
'Plzz'
)](CryptoJS[_0x232c(
'0x17'
,
'fWan'
)][_0x232c(
'0x18'
,
'H8Db'
)]);
if
(_0xe1f02a[
'wmyOd'
](_0x2ea3c6,
''
)) {
if
(_0xe1f02a[_0x232c(
'0x19'
,
'(tyc'
)](_0xe1f02a[_0x232c(
'0x1a'
,
'RcQ4'
)], _0xe1f02a[_0x232c(
'0x1b'
,
'fWan'
)])) {
return
''
;
}
else
{
return
''
;
}
}
imgs = _0x2ea3c6[_0x232c(
'0x1c'
,
'88oY'
)](
'|'
);
if
(_0xe1f02a[_0x232c(
'0x1d'
,
'O3X#'
)](imgs[_0x232c(
'0x1e'
,
'oEHH'
)], 0x0)) {
s =
''
;
len = imgs[_0x232c(
'0x1f'
,
'fyE)'
)];
for
(
var
_0xff1a4d = 0x0; _0xe1f02a[_0x232c(
'0x20'
,
'e*R8'
)](_0xff1a4d, len); _0xff1a4d++) {
if
(_0xe1f02a[_0x232c(
'0x21'
,
'jAwS'
)](imgs[_0xff1a4d][_0x232c(
'0x22'
,
'4of&'
)](_0xe1f02a[_0x232c(
'0x23'
,
'jAwS'
)]), -0x1)) {
info = _0xe1f02a[_0x232c(
'0x24'
,
'fyE)'
)](_0x2ca615, imgs[_0xff1a4d]);
w = _0xe1f02a[_0x232c(
'0x25'
,
'oEHH'
)](info[0x1], 0x96) ? 0x14 : 0x64;
s += _0xe1f02a[_0x232c(
'0x26'
,
'Plzz'
)](_0xe1f02a[_0x232c(
'0x27'
,
'zw$3'
)](_0xe1f02a[_0x232c(
'0x28'
,
'wrg$'
)](_0xe1f02a[_0x232c(
'0x29'
,
'Vsp#'
)](_0xe1f02a[
'qcRQe'
](_0xe1f02a[_0x232c(
'0x2a'
,
'Q]pH'
)](_0x232c(
'0x2b'
,
'saz)'
), w), _0xe1f02a[
'sSLrn'
]), info[0x0]), _0xe1f02a[_0x232c(
'0x2c'
,
'H8Db'
)]), _0x1b9bc1[
'l'
]),
'\x22>'
);
continue
;
}
if
(_0x1b9bc1[
'c'
] && _0xe1f02a[_0x232c(
'0x2d'
,
'yert'
)](_0x1b9bc1[
'c'
], 0x0)) {
var
_0x3b6771 = _0x232c(
'0x2e'
,
'saz)'
)[_0x232c(
'0x2f'
,
'jAwS'
)](
'|'
)
, _0x128fc0 = 0x0;
while
(!![]) {
switch
(_0x3b6771[_0x128fc0++]) {
case
'0'
:
k = _0xe1f02a[_0x232c(
'0x30'
,
'z#4F'
)](_0xff1a4d, 0x1);
continue
;
case
'1'
:
mod = _0xe1f02a[
'RboUH'
](k, _0x1b9bc1[
'c'
]);
continue
;
case
'2'
:
if
(k != 0x1 && _0xe1f02a[_0x232c(
'0x31'
,
'1pZZ'
)](mod, 0x0)) {}
continue
;
case
'3'
:
if
(_0xe1f02a[
'ymPPG'
](_0x1b9bc1[
'c'
], 0x6)) {
if
(_0xe1f02a[_0x232c(
'0x32'
,
'zw$3'
)] === _0xe1f02a[_0x232c(
'0x33'
,
'PgS1'
)]) {
return
str[_0x232c(
'0x34'
,
'T]C8'
)](sp);
}
else
{
if
(_0xe1f02a[_0x232c(
'0x35'
,
'PgS1'
)](k, 0x1) || k != 0x1 && mod == 0x1) {
w = 0x64;
}
else
{
w = 0x14;
}
}
}
continue
;
case
'4'
:
s += _0xe1f02a[
'TWFcO'
](_0xe1f02a[
'WngaM'
](_0xe1f02a[
'mTPxd'
](_0xe1f02a[
'osuEz'
](_0xe1f02a[_0x232c(
'0x36'
,
'4of&'
)] + w + _0xe1f02a[_0x232c(
'0x37'
,
'I0J#'
)], imgs[_0xff1a4d]), _0xe1f02a[_0x232c(
'0x38'
,
'#5gG'
)]), _0x1b9bc1[
'l'
]),
'\x22>'
);
continue
;
case
'5'
:
w = _0xe1f02a[
'WPfTk'
](0x64, _0x1b9bc1[
'c'
]);
continue
;
}
break
;
}
}
else
{
s += _0xe1f02a[_0x232c(
'0x39'
,
'TX#a'
)](_0xe1f02a[_0x232c(
'0x3a'
,
'zw$3'
)](_0xe1f02a[
'qkGRr'
](_0xe1f02a[_0x232c(
'0x3b'
,
'aS*w'
)](
'<img\x20style=\x22width:100%;\x22\x20class=\x22lazy\x22\x20data-original=\x22'
, imgs[_0xff1a4d]), _0xe1f02a[_0x232c(
'0x3c'
,
'I0J#'
)]), _0x1b9bc1[
'l'
]),
'\x22>'
);
}
}
_0xe1f02a[_0x232c(
'0x3d'
,
'jAA%'
)]($, _0x1b9bc1[
'f'
])[_0x232c(
'0x3e'
,
'u5iv'
)](s);
}
-
分析这个函数这一个关键imgs变量处,在打个断点
-
输出一个imgs, 发现这就是我们想要的结果
-
分析到这里基本已经得到完成, 下面继续写代码爬取图片入库等操作
-
总结整理一下,得出以下代码
-
12345678910111213141516
var
loadConf = {
i:
"2fwFfyil4wHJqrgEtXgpFAfgoiD47DksIXZNdbrHtA4C+iN5hH3rK3ZohZoz/tBeXkzqlFDtVhqHdceI/Lo7jUBW2z9JRmAWORxfrfO/fCP1E8jjGI4bpLDzisIaOi1X/lA0rv+pUieoftsDVSOq9hclmcV38tsTghaxT0Tqx0Z28sXK8PX93UjdLrdnqj1ESng8x25FAz9d4B5SANBO+NqKanBZ/kyYZ7q96OygRc+Qf7k29A792SQMtu20ZpA+/1PGgC4vpOZyS8No7CN7dSkfC+0tfqDCU3I6Bhixq13uJ114ryF8Cod+0d7WO5GakDr7mIjlemugfT3jprKlSFZKoNLlDt07M6MRT73QPZPIZxGkKiZlGAgYuIIWtvGXNy8wtsI7Olwkk9YIBD7TduUmMiWhNEvSqfxeVEsk1f6/r2/U/qPYJiWGgWKLwl4M0CXLaU2NV8htYyyLAA/6bSP7dTm6+hFmF/ktcJ6ow8bHsQpoVjjlIgERtptARrUjHlg567Mqk3IZRf3zQE3hqoN4iN9DvPlpKez8a8fBuPbdPB0jUj3xpCr8yoggXW9Sb9SxTXB1/yxKG2OhoboqoOK9rjxEZucp5P+AEae+UKpiN5j7SaOW8SEZ48wl6Ln0kRBmpfbodDronlR/vIXFhWZiHTEgLAifWk1fckwUEQg9IQPd1CeTlAGwgUm+1zYqKiziAs34arPp+faLL25RYrGkhU6OldrYStQsE7TDj1p1pWbDNiZBzA3+H7en2wXIBEqSvC+FXL9ODBooB9DSaGbjIUWrQ6Q+QToUVdU8uFs5siQGFQ1cpI8GAyqUD/NQPMF4mjG2IqYJUvCyJj8ZEgXG0FUAj24H1FMAWn4W3h4D+zrmGFHR+Q5jecg+tZSrsyYG0tpTJZ59lwi/+Iw8bcXALVMD1QXfkgosN1M2gMl5sBGgkGMFl2hivRs37RmUMVic2PceW9pLQO1DQLyWMfP8YhYSdjegKM7m/wEwcu9FhN3DszNgkCGhWCqIuiJfzwRIMrSozs9RSl7CcLaekWWF+08IPFcLVWCuiOTKKNXjsOZ/4VtgLCkBMfDQwVmI1pwgYwXyeOcE37PBgGqy229hafx+KPkGPBtGXMCEE1SG/9GBEU1JdvQthGmtMkMWFQ9UZS00VvwGdYArZNPXEOgjZEQRlKwRvZ/dtVRpH6T3VSPudjNxaiLVjYvhP4lXtuCHSXQ1glWIFMM+14ZBl+7VQEOAJ11+Yggqskbv/WEu0PxpK8EvnHx4QTlo0KHXwTNzz29CpejJ5LZwDBKogCsaAlkKDflfNRkhaxpJavkqi2SOX5q3R1CU1bhsPyx00c7mRnv1LIY5fXqNLLoDjlzq91tE9FdqudOuJWR/GciSCQnaXzd+Y0OgTDBN2Szach9bjr2uzW2JuoN945vHfHvKUxdcBPy1eVSqRwjkXA8zpsgETxkRutWBeW74ZQGnlDb4QgHxsxTFJd4nHJydV2W1YZd6lOosO7C6Ryl34b1MLq8qL/zgwArt/xe1qHuY2PMKIpC+zBOX/WHjxWsZs9c4RU1akfnkcl5tCxnjl1pI4NyDpEEjE2RHhXHVAQayr84tAMtNcdLoVdl9cJWRKJ87wfXfgCED5zZLUxGbg7CXk8iQZHE+RZnEQ3m979Xipn0sbT1wtqB4y5B2oFAGzX5CfrAMj3Z8tOXMftj9EZBB+Ms8Lfz1Fr0wvcT2NUwUdvdf4ZXk99r2Z2gNrEJEG9yU6lFOLONVCwkDBGHqD3J5FL7P6xHwUXTb8mXILtB2h9+hdu4s6wrHJ1y0THBM2G42DE8DXf2Ca0sztlvFvxAOOWqYuT4ENev9ows0lkXDclchGIiQ+LVGpBwBWPpWhFiZeM16UGzC0C/nSL4irO4SXvDltdhcSEuQRAxM4mQyJB0pQs3k8WDi1fi0qN8lUjnPszkun4PImxZEiVw2KLKActzPqVW+LTT2R9KkD6SbHNRBXyMdt5FSx9UkLZVa0urCweWQvKe73xmcp1S6Jkr5Ifmi21hxiCbMx50sOm6EkIRiHVhhzIEftTKVXH/ioDzUY43ROxeqTkmHc7fdpu9l0esNGnTMF+emucq9G9IsoiWPvLEnHURMlbNeKtHS5Y8K8G9cyGHe15+KsqZWv0OObys7WMzZuoKl+AtbaJCixzxdX/cHNuDPpEXvRbLVvicUjfPzt1sk6SYjd9pzyDjR5tcIPIRSoz87iJJUzH+yqTDREJKURmbIq8Pjfn+a8RU+LVyL3xFap3jSyCVPi0LbQzcGrg2E0d457I4RLTzj0JtjPnn7DnElzD+WAUNdnaKPfs6tgej47pczPTVf2TE6apwBk1joC1JICsCPN6QEm1CokvGWQgis+1rpPi2hEuC+FPPNqOfE4BTpbxBfyl/QsGEwu1VBvGXJmcgsuh7ogvGXUmx2Xo00TMTgoLrc+2t9cWtuMq1T9ralJ1wxnKFH29ZOVVvPDHZw/uzZ98f4u5L1wpQQ4PxzzwJp0DLOwgxV9vRbznZnQBWT2ABQ6z8786WdkJ6srjtzUNVKesplg+aOLeoiRLAE9UrjUTmICzS0B1u4FVAQAIldDGOmtLNTfIP7TALmyLzKnxKfCnQSwWf3ZXVEBrZugzbTHSh0uU+oiwHz9Nemgsw/HW6qwSoFii2hIRS+EKWvkIgsnCZfZP8CHZss7Cy6DEeL5GZA6jMNMNbnnafJxyqM4K6rNljP9dUFsDeWfvCFYpoOwG4JuiCSR/O9cYed05wl0Qk88pEhRm5VkmUoBmM1AnBgki4W3OEFZaALhhzI78EgfpG+Pj5NfvetvVCcwLzAIZZhWKX1pK/P6Z0fxF9vvVcqqNr0vMY4h6x2oCzK0FdeBN4lZrjnveXMDYti1nVT5mgfgmJt9xJmYpvWd7/we/zyoBESkkpPaNlyZMzBoBQrrXIzxQ2dUsi/dQVf/hfajXRKn4FdK9Upz9ky9BU9HYZR/1AyOI0LIlPc0ve2ZnB/2ZsOuoR/eWkSHuQukOlf7eG2fXUXV9gHwOqtyNgGvymOQk4rHi4hfUns8LExCjwcgb9nDQiZgZvqcaSqPB3quqN1zSTIhk12Was2C8QbbPo9hhe4i6cK4hec9VWm4sf9OKa8A7PCQgSG/dywoS+LTHaQ0vZjFqREE3Z6rKp/0Mbrffpfd2p25lxKDX7oVtmJBLmHwu8AEZWzijT1H495tz+2b8/r1RhKfgaeV1wyXVSb/AYSJAMvHu3RMnMQFkLoz+60ltuOM4HBXXUZyYlYARMyXV6PFTNnnfb8aJfldHB7CJyVBcUtSpbtybDL4+tLfWMXFfnILszUrOkQaM1SXw+6Rw2KFvkMwkSryeeVDI9j+4vovfL1I9iBt5wh/F2AF/phA08vxvRWcTcjM8RYowEE2uhdnk4q6ur2Ev00N8ggOKhnLG56yuRm2j+T5D8exgfbeiiN/iz4YP16wAvVcWX96PC/eezKNvnpu8NcenId6B4UkB2qwiP2/UuDJUpkxSCh2WAovk8mGYpXDLl4Ev1Aby5m51ePG9WGK57Rm4QQouJ45mYeZUZtf3pNllMBONTfLPtutp/TwTNJ2Y5/OqnLoXsNgo5L0Gpv4/ZeGu6oA0pmWy2Db/Jk/n0PjjLhLPtvI75Ew7H7jGtwvtKcwydsmhzjJL86X9p82piAtDUEtIC0014n3inh9/6HMJQyh2ItjSxlBxvicuhiGUf5xl0tRaiEKEchH+7R9k/s/w3iveUhGJmVBQbFDyxjtqLh8jbEjwkuD1rgihq0gDVEVcBJ8FAtz2jXNXXPlLe7FFhEebryiNnDswA8wMLMyZ5xJgnUyfxqsT5oeUVfEEkqWtTUjzRY5xCfDqgAWjvFZQg7pDkhlHSPlvsePreNGFjPpphxgjRWURKhZXXT0j6VT5PDAmlPy03pRS3i8k64WeWlOkIqb7Evp4aDTjZh/ZTVaKzeXnQ4iGeKcJTNTnN/LLQUO3Y8nhLio18M8S79rR5/4sG+zP6yO10ThOyuRlTDc/9weWEoQQEt+4TbqfB4ORHAix7S/IpCGdzV2O8ifku5/v34KMjaRWLn0UgeqfwDO2y1P7W08jOr7vkYIyzcnUUpLV/5xJ76UBiXcVbGMCg/f/Uv9dz/RENKaOEnnB1J5uvrZ4tFHc6eatNLFO9622mgGiTI6MSs6Hse9zyBE76qhDZbiDH2ENEwrTGZzXZ5YKolWlCsIau7iH/a3r/LA6iOJp28QJckUM6MFY1L8kQO/qay81528M+8Bg3U0ebGDzccsg8FmLRCNH5OBoStmLkcEKVQjBvKmKKafxbxwEz3jyW8zPkli6LqItp9Oy+Pf2NYpMSEh9r67GL2GClmrz0FfLczrHoj80M68oyRqt+EF4gzFfMMjipzzOnuVFTzwhFsyjFkEKKDY7UDCzko32pAKgd2YkqmurVa4A8/cYeat+ugcheKjkzWx3KQ1ttkXZa+gqEp1wNGCyosQVxiwdN/3SNi7ra0NGvMahLMIByJmGOidoO/efc/1kUJ7fqtVYYOJab2TLPzTAaerkMBW8WLCsFWpet05drHspv+nO3heo+mN7EF3oG6COEmJ8RdWcvDVqLQ8QPY3phg75ksqGqDYExRUZoJGsbax/2tXo8bQx5WaZMNGEXPZMeQoDvSDyxLRdIRv4k4TXRWccxSg9QNR+PZqCQZsp7bYZl/4NZ/GEU="
,
c:
"0"
,
k:
"fd946a640a65eb1d"
,
d:
"momomh.com"
,
l:
"https://ae01.alicdn.com/kf/Uf8692d06f3694b03b1881ded2b087438H.png"
,
f:
"#cp_img"
};
var
a = CryptoJS.enc.Utf8.parse(loadConf[
'k'
])
console.log(a);
var
b = CryptoJS.AES.decrypt(loadConf[
"i"
], a, {
'iv'
: a,
'padding'
: CryptoJS.pad.Pkcs7
})
var
c = b.toString(CryptoJS.enc.Utf8)
console.log(c);
编写代码爬取漫画
-
新建一个_momomh.js 文件, 把上面的js代码稍微整理下copy进去
-
新建个momomh_com.py 文件 编写爬虫逻辑
-
1234567891011121314151617181920212223242526272829303132
# !/usr/bin/python3
# -*- coding: utf-8 -*-
# Time : 2020/10/28 15:35
# Author : Amd794
# Email : 2952277346@qq.com
# Github : https://github.com/Amd794
import
re
import
execjs
from
threading_download_images
import
get_response
class
Momomh(
object
):
@
staticmethod
def
_momomh(detail_url):
header
=
{
'User-Agent'
:
'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1 Edg/85.0.4183.83'
,
}
response
=
get_response(detail_url, header
=
header)
load_conf
=
re.findall(
'var loadConf = ({.*?})'
, response.text, re.S)[
0
].strip(
'\n'
)
word
=
[
'i:'
,
'c:'
,
'k:'
,
'd:'
,
'l:'
,
'f:'
]
for
i
in
word:
load_conf
=
load_conf.replace(i, l :
=
f
'"{i[0]}":'
)
ctx
=
execjs.get().
compile
(
open
(
'../js/_momomh.js'
).read(), cwd
=
'../js/node_modules'
)
data
=
ctx.call(
'getArr'
,
eval
(load_conf))
image_url
=
[url.strip(
'_w_720'
)
for
url
in
data]
return
image_url
if
__name__
=
=
'__main__'
:
print
(Momomh._momomh(
'https://m.momomh.com/view/ZJBBO.html'
))
4. 最后整合到主程序中测试, 没问题后就可以部署到服务器上进行爬取入库。
5. 稍微配置以下
6. 运行看最终结果
7.
图片合并
8. 下载完成后,会发现一个问题。下载下来的图片被切割了
所以,还要做合并一下。代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | # !/usr/bin/python3 # -*- coding: utf-8 -*- # Time : 2020/11/22 19:35 # Author : Amd794 # Email : 2952277346@qq.com # Github : https://github.com/Amd794 import os import re from shutil import copyfile from PIL import Image def f(s): try : return int (re.findall( '\d+' , s)[ 0 ]) except IndexError: return 999 suffix = [ 'jpg' , 'png' , 'jpeg' ] page = 5 file_list = [imgFileName for imgFileName in os.listdir( '.' ) if imgFileName.endswith( tuple (suffix)) and '_w_144' in imgFileName] file_list.sort(key = f) file_groups = [[x for x in file_list][i:i + page] for i in range ( 0 , len (file_list), page)] file_name = '' for group in file_groups: print (f '-----正在操作{group}分组-----' ) image = Image. open (group[ 0 ]) width, height = image.size to_image = Image.new( 'RGB' , (width * page, height)) # 创建一个新图 for pic in group: file_name = pic.replace( '_w_144' , '') to_image.paste(Image. open (pic), ( int (width) * group.index(pic), 0 )) to_image.save(file_name) # 及时释放文件 image.close() to_image.close() for i in file_list: try : os.remove(i) except PermissionError: print (f '-----{i} PermissionError-----' ) with open ( 'error_urls.txt' , 'w' ) as fw: fw.close() copyfile( 'try_to_fix.py' , os.path.join( './' , 'try_to_fix.py' )) os.system( "python try_to_fix.py" ) os.remove(__file__) |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET开发智能桌面机器人:用.NET IoT库编写驱动控制两个屏幕
· 用纯.NET开发并制作一个智能桌面机器人:从.NET IoT入门开始
· 一个超经典 WinForm,WPF 卡死问题的终极反思
· ASP.NET Core - 日志记录系统(二)
· .NET 依赖注入中的 Captive Dependency
· 在外漂泊的这几年总结和感悟,展望未来
· 博客园 & 1Panel 联合终身会员上线
· 支付宝事故这事儿,凭什么又是程序员背锅?有没有可能是这样的...
· https证书一键自动续期,帮你解放90天限制
· 在 ASP.NET Core WebAPI如何实现版本控制?