沫沫漫画网Js逆向分析爬取全站资源入库处理图片合并

网站分析


  1. 打开目标网站:https://www.momomh.com/

  2. 选择一部漫画作为分析对象:《渴望:爱火难耐》

  3. 进到漫画详情页这里,发现并没有需要逆向分析。直接可以获取漫画信息。随便点击一章进去:渴望:爱火难耐-第1话

  4. F12 打开开发者工具,选择elements查看源码。找到突破口,发现在某个script标签下,有一段加密得字符串

  5. cbedc3ebece0a64a3a9b6761aa4a4276.png

  6. 8eae5ae16634e0f35c3fa4bcbd60d1ae.png

  7. 由里到外对这一行代码进行分析,其中 _0x232c('0x7', 'T]C8') 意思就是往_0x232c 函数出入两个值不变得字符串,得到一个固定得返回结果。

    换句话说, 就是_0x232c('0x7', 'T]C8') 这个结果就是个固定值EReVr

  8. d6c44dcdf76c79418db08bf547ebd76a.png

  9. 向外扩散分析 ,其中_0xe1f02a[_0x232c('0x7', 'T]C8')]和上面分析一个套路,得到一个固定返回值

  10. fd1e00c5fd71cfb9495567003237f3c7.png

  11. 点击输出得函数进行跳转分析代码,其实就是在我们断点的上面

  12. 6c5b182b5f49d85b156a78361a666563.png

  13. 稍微看下就能得出,其实这个函数就是把第一个参数作为函数, 第二个参数作为第一个参数函数的参数。

    所以总结得出打断点处的这段代码就是

  14. 1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
        function _0x317597(_0x1b9bc1) {
            var _0x42f71a = CryptoJS[_0x232c('0x11''e*R8')]['Utf8'][_0x232c('0x12''e*R8')](_0x1b9bc1['k']);
            var _0x2ea3c6 = CryptoJS[_0x232c('0x13''O3X#')][_0x232c('0x14''e*R8')](_0x1b9bc1['i'], _0x42f71a, {
                'iv': _0x42f71a,
                'padding': CryptoJS['pad'][_0x232c('0x15''oEHH')]
            });
            _0x2ea3c6 = _0x2ea3c6[_0x232c('0x16''Plzz')](CryptoJS[_0x232c('0x17''fWan')][_0x232c('0x18''H8Db')]);
            if (_0xe1f02a['wmyOd'](_0x2ea3c6, '')) {
                if (_0xe1f02a[_0x232c('0x19''(tyc')](_0xe1f02a[_0x232c('0x1a''RcQ4')], _0xe1f02a[_0x232c('0x1b''fWan')])) {
                    return '';
                else {
                    return '';
                }
            }
            imgs = _0x2ea3c6[_0x232c('0x1c''88oY')]('|');
            if (_0xe1f02a[_0x232c('0x1d''O3X#')](imgs[_0x232c('0x1e''oEHH')], 0x0)) {
                s = '';
                len = imgs[_0x232c('0x1f''fyE)')];
                for (var _0xff1a4d = 0x0; _0xe1f02a[_0x232c('0x20''e*R8')](_0xff1a4d, len); _0xff1a4d++) {
                    if (_0xe1f02a[_0x232c('0x21''jAwS')](imgs[_0xff1a4d][_0x232c('0x22''4of&')](_0xe1f02a[_0x232c('0x23''jAwS')]), -0x1)) {
                        info = _0xe1f02a[_0x232c('0x24''fyE)')](_0x2ca615, imgs[_0xff1a4d]);
                        w = _0xe1f02a[_0x232c('0x25''oEHH')](info[0x1], 0x96) ? 0x14 : 0x64;
                        s += _0xe1f02a[_0x232c('0x26''Plzz')](_0xe1f02a[_0x232c('0x27''zw$3')](_0xe1f02a[_0x232c('0x28''wrg$')](_0xe1f02a[_0x232c('0x29''Vsp#')](_0xe1f02a['qcRQe'](_0xe1f02a[_0x232c('0x2a''Q]pH')](_0x232c('0x2b''saz)'), w), _0xe1f02a['sSLrn']), info[0x0]), _0xe1f02a[_0x232c('0x2c''H8Db')]), _0x1b9bc1['l']), '\x22>');
                        continue;
                    }
                    if (_0x1b9bc1['c'] && _0xe1f02a[_0x232c('0x2d''yert')](_0x1b9bc1['c'], 0x0)) {
                        var _0x3b6771 = _0x232c('0x2e''saz)')[_0x232c('0x2f''jAwS')]('|')
                          , _0x128fc0 = 0x0;
                        while (!![]) {
                            switch (_0x3b6771[_0x128fc0++]) {
                            case '0':
                                k = _0xe1f02a[_0x232c('0x30''z#4F')](_0xff1a4d, 0x1);
                                continue;
                            case '1':
                                mod = _0xe1f02a['RboUH'](k, _0x1b9bc1['c']);
                                continue;
                            case '2':
                                if (k != 0x1 && _0xe1f02a[_0x232c('0x31''1pZZ')](mod, 0x0)) {}
                                continue;
                            case '3':
                                if (_0xe1f02a['ymPPG'](_0x1b9bc1['c'], 0x6)) {
                                    if (_0xe1f02a[_0x232c('0x32''zw$3')] === _0xe1f02a[_0x232c('0x33''PgS1')]) {
                                        return str[_0x232c('0x34''T]C8')](sp);
                                    else {
                                        if (_0xe1f02a[_0x232c('0x35''PgS1')](k, 0x1) || k != 0x1 && mod == 0x1) {
                                            w = 0x64;
                                        else {
                                            w = 0x14;
                                        }
                                    }
                                }
                                continue;
                            case '4':
                                s += _0xe1f02a['TWFcO'](_0xe1f02a['WngaM'](_0xe1f02a['mTPxd'](_0xe1f02a['osuEz'](_0xe1f02a[_0x232c('0x36''4of&')] + w + _0xe1f02a[_0x232c('0x37''I0J#')], imgs[_0xff1a4d]), _0xe1f02a[_0x232c('0x38''#5gG')]), _0x1b9bc1['l']), '\x22>');
                                continue;
                            case '5':
                                w = _0xe1f02a['WPfTk'](0x64, _0x1b9bc1['c']);
                                continue;
                            }
                            break;
                        }
                    else {
                        s += _0xe1f02a[_0x232c('0x39''TX#a')](_0xe1f02a[_0x232c('0x3a''zw$3')](_0xe1f02a['qkGRr'](_0xe1f02a[_0x232c('0x3b''aS*w')]('<img\x20style=\x22width:100%;\x22\x20class=\x22lazy\x22\x20data-original=\x22', imgs[_0xff1a4d]), _0xe1f02a[_0x232c('0x3c''I0J#')]), _0x1b9bc1['l']), '\x22>');
                    }
                }
                _0xe1f02a[_0x232c('0x3d''jAA%')]($, _0x1b9bc1['f'])[_0x232c('0x3e''u5iv')](s);
            }
  15. 分析这个函数这一个关键imgs变量处,在打个断点

  16. d357fb68fa7499dc2a5415e43602e4c2.png

  17. 输出一个imgs, 发现这就是我们想要的结果

  18. 61f2b39c35ff61d1cd2b50686a0c29d1.png

  19. 分析到这里基本已经得到完成, 下面继续写代码爬取图片入库等操作

  20. 总结整理一下,得出以下代码

  21. 1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    var loadConf = {
      i: "2fwFfyil4wHJqrgEtXgpFAfgoiD47DksIXZNdbrHtA4C+iN5hH3rK3ZohZoz/tBeXkzqlFDtVhqHdceI/Lo7jUBW2z9JRmAWORxfrfO/fCP1E8jjGI4bpLDzisIaOi1X/lA0rv+pUieoftsDVSOq9hclmcV38tsTghaxT0Tqx0Z28sXK8PX93UjdLrdnqj1ESng8x25FAz9d4B5SANBO+NqKanBZ/kyYZ7q96OygRc+Qf7k29A792SQMtu20ZpA+/1PGgC4vpOZyS8No7CN7dSkfC+0tfqDCU3I6Bhixq13uJ114ryF8Cod+0d7WO5GakDr7mIjlemugfT3jprKlSFZKoNLlDt07M6MRT73QPZPIZxGkKiZlGAgYuIIWtvGXNy8wtsI7Olwkk9YIBD7TduUmMiWhNEvSqfxeVEsk1f6/r2/U/qPYJiWGgWKLwl4M0CXLaU2NV8htYyyLAA/6bSP7dTm6+hFmF/ktcJ6ow8bHsQpoVjjlIgERtptARrUjHlg567Mqk3IZRf3zQE3hqoN4iN9DvPlpKez8a8fBuPbdPB0jUj3xpCr8yoggXW9Sb9SxTXB1/yxKG2OhoboqoOK9rjxEZucp5P+AEae+UKpiN5j7SaOW8SEZ48wl6Ln0kRBmpfbodDronlR/vIXFhWZiHTEgLAifWk1fckwUEQg9IQPd1CeTlAGwgUm+1zYqKiziAs34arPp+faLL25RYrGkhU6OldrYStQsE7TDj1p1pWbDNiZBzA3+H7en2wXIBEqSvC+FXL9ODBooB9DSaGbjIUWrQ6Q+QToUVdU8uFs5siQGFQ1cpI8GAyqUD/NQPMF4mjG2IqYJUvCyJj8ZEgXG0FUAj24H1FMAWn4W3h4D+zrmGFHR+Q5jecg+tZSrsyYG0tpTJZ59lwi/+Iw8bcXALVMD1QXfkgosN1M2gMl5sBGgkGMFl2hivRs37RmUMVic2PceW9pLQO1DQLyWMfP8YhYSdjegKM7m/wEwcu9FhN3DszNgkCGhWCqIuiJfzwRIMrSozs9RSl7CcLaekWWF+08IPFcLVWCuiOTKKNXjsOZ/4VtgLCkBMfDQwVmI1pwgYwXyeOcE37PBgGqy229hafx+KPkGPBtGXMCEE1SG/9GBEU1JdvQthGmtMkMWFQ9UZS00VvwGdYArZNPXEOgjZEQRlKwRvZ/dtVRpH6T3VSPudjNxaiLVjYvhP4lXtuCHSXQ1glWIFMM+14ZBl+7VQEOAJ11+Yggqskbv/WEu0PxpK8EvnHx4QTlo0KHXwTNzz29CpejJ5LZwDBKogCsaAlkKDflfNRkhaxpJavkqi2SOX5q3R1CU1bhsPyx00c7mRnv1LIY5fXqNLLoDjlzq91tE9FdqudOuJWR/GciSCQnaXzd+Y0OgTDBN2Szach9bjr2uzW2JuoN945vHfHvKUxdcBPy1eVSqRwjkXA8zpsgETxkRutWBeW74ZQGnlDb4QgHxsxTFJd4nHJydV2W1YZd6lOosO7C6Ryl34b1MLq8qL/zgwArt/xe1qHuY2PMKIpC+zBOX/WHjxWsZs9c4RU1akfnkcl5tCxnjl1pI4NyDpEEjE2RHhXHVAQayr84tAMtNcdLoVdl9cJWRKJ87wfXfgCED5zZLUxGbg7CXk8iQZHE+RZnEQ3m979Xipn0sbT1wtqB4y5B2oFAGzX5CfrAMj3Z8tOXMftj9EZBB+Ms8Lfz1Fr0wvcT2NUwUdvdf4ZXk99r2Z2gNrEJEG9yU6lFOLONVCwkDBGHqD3J5FL7P6xHwUXTb8mXILtB2h9+hdu4s6wrHJ1y0THBM2G42DE8DXf2Ca0sztlvFvxAOOWqYuT4ENev9ows0lkXDclchGIiQ+LVGpBwBWPpWhFiZeM16UGzC0C/nSL4irO4SXvDltdhcSEuQRAxM4mQyJB0pQs3k8WDi1fi0qN8lUjnPszkun4PImxZEiVw2KLKActzPqVW+LTT2R9KkD6SbHNRBXyMdt5FSx9UkLZVa0urCweWQvKe73xmcp1S6Jkr5Ifmi21hxiCbMx50sOm6EkIRiHVhhzIEftTKVXH/ioDzUY43ROxeqTkmHc7fdpu9l0esNGnTMF+emucq9G9IsoiWPvLEnHURMlbNeKtHS5Y8K8G9cyGHe15+KsqZWv0OObys7WMzZuoKl+AtbaJCixzxdX/cHNuDPpEXvRbLVvicUjfPzt1sk6SYjd9pzyDjR5tcIPIRSoz87iJJUzH+yqTDREJKURmbIq8Pjfn+a8RU+LVyL3xFap3jSyCVPi0LbQzcGrg2E0d457I4RLTzj0JtjPnn7DnElzD+WAUNdnaKPfs6tgej47pczPTVf2TE6apwBk1joC1JICsCPN6QEm1CokvGWQgis+1rpPi2hEuC+FPPNqOfE4BTpbxBfyl/QsGEwu1VBvGXJmcgsuh7ogvGXUmx2Xo00TMTgoLrc+2t9cWtuMq1T9ralJ1wxnKFH29ZOVVvPDHZw/uzZ98f4u5L1wpQQ4PxzzwJp0DLOwgxV9vRbznZnQBWT2ABQ6z8786WdkJ6srjtzUNVKesplg+aOLeoiRLAE9UrjUTmICzS0B1u4FVAQAIldDGOmtLNTfIP7TALmyLzKnxKfCnQSwWf3ZXVEBrZugzbTHSh0uU+oiwHz9Nemgsw/HW6qwSoFii2hIRS+EKWvkIgsnCZfZP8CHZss7Cy6DEeL5GZA6jMNMNbnnafJxyqM4K6rNljP9dUFsDeWfvCFYpoOwG4JuiCSR/O9cYed05wl0Qk88pEhRm5VkmUoBmM1AnBgki4W3OEFZaALhhzI78EgfpG+Pj5NfvetvVCcwLzAIZZhWKX1pK/P6Z0fxF9vvVcqqNr0vMY4h6x2oCzK0FdeBN4lZrjnveXMDYti1nVT5mgfgmJt9xJmYpvWd7/we/zyoBESkkpPaNlyZMzBoBQrrXIzxQ2dUsi/dQVf/hfajXRKn4FdK9Upz9ky9BU9HYZR/1AyOI0LIlPc0ve2ZnB/2ZsOuoR/eWkSHuQukOlf7eG2fXUXV9gHwOqtyNgGvymOQk4rHi4hfUns8LExCjwcgb9nDQiZgZvqcaSqPB3quqN1zSTIhk12Was2C8QbbPo9hhe4i6cK4hec9VWm4sf9OKa8A7PCQgSG/dywoS+LTHaQ0vZjFqREE3Z6rKp/0Mbrffpfd2p25lxKDX7oVtmJBLmHwu8AEZWzijT1H495tz+2b8/r1RhKfgaeV1wyXVSb/AYSJAMvHu3RMnMQFkLoz+60ltuOM4HBXXUZyYlYARMyXV6PFTNnnfb8aJfldHB7CJyVBcUtSpbtybDL4+tLfWMXFfnILszUrOkQaM1SXw+6Rw2KFvkMwkSryeeVDI9j+4vovfL1I9iBt5wh/F2AF/phA08vxvRWcTcjM8RYowEE2uhdnk4q6ur2Ev00N8ggOKhnLG56yuRm2j+T5D8exgfbeiiN/iz4YP16wAvVcWX96PC/eezKNvnpu8NcenId6B4UkB2qwiP2/UuDJUpkxSCh2WAovk8mGYpXDLl4Ev1Aby5m51ePG9WGK57Rm4QQouJ45mYeZUZtf3pNllMBONTfLPtutp/TwTNJ2Y5/OqnLoXsNgo5L0Gpv4/ZeGu6oA0pmWy2Db/Jk/n0PjjLhLPtvI75Ew7H7jGtwvtKcwydsmhzjJL86X9p82piAtDUEtIC0014n3inh9/6HMJQyh2ItjSxlBxvicuhiGUf5xl0tRaiEKEchH+7R9k/s/w3iveUhGJmVBQbFDyxjtqLh8jbEjwkuD1rgihq0gDVEVcBJ8FAtz2jXNXXPlLe7FFhEebryiNnDswA8wMLMyZ5xJgnUyfxqsT5oeUVfEEkqWtTUjzRY5xCfDqgAWjvFZQg7pDkhlHSPlvsePreNGFjPpphxgjRWURKhZXXT0j6VT5PDAmlPy03pRS3i8k64WeWlOkIqb7Evp4aDTjZh/ZTVaKzeXnQ4iGeKcJTNTnN/LLQUO3Y8nhLio18M8S79rR5/4sG+zP6yO10ThOyuRlTDc/9weWEoQQEt+4TbqfB4ORHAix7S/IpCGdzV2O8ifku5/v34KMjaRWLn0UgeqfwDO2y1P7W08jOr7vkYIyzcnUUpLV/5xJ76UBiXcVbGMCg/f/Uv9dz/RENKaOEnnB1J5uvrZ4tFHc6eatNLFO9622mgGiTI6MSs6Hse9zyBE76qhDZbiDH2ENEwrTGZzXZ5YKolWlCsIau7iH/a3r/LA6iOJp28QJckUM6MFY1L8kQO/qay81528M+8Bg3U0ebGDzccsg8FmLRCNH5OBoStmLkcEKVQjBvKmKKafxbxwEz3jyW8zPkli6LqItp9Oy+Pf2NYpMSEh9r67GL2GClmrz0FfLczrHoj80M68oyRqt+EF4gzFfMMjipzzOnuVFTzwhFsyjFkEKKDY7UDCzko32pAKgd2YkqmurVa4A8/cYeat+ugcheKjkzWx3KQ1ttkXZa+gqEp1wNGCyosQVxiwdN/3SNi7ra0NGvMahLMIByJmGOidoO/efc/1kUJ7fqtVYYOJab2TLPzTAaerkMBW8WLCsFWpet05drHspv+nO3heo+mN7EF3oG6COEmJ8RdWcvDVqLQ8QPY3phg75ksqGqDYExRUZoJGsbax/2tXo8bQx5WaZMNGEXPZMeQoDvSDyxLRdIRv4k4TXRWccxSg9QNR+PZqCQZsp7bYZl/4NZ/GEU=",
      c: "0",
      k: "fd946a640a65eb1d",
      d: "momomh.com",
      l: "https://ae01.alicdn.com/kf/Uf8692d06f3694b03b1881ded2b087438H.png",
      f: "#cp_img"
    };
    var a = CryptoJS.enc.Utf8.parse(loadConf['k'])
    console.log(a);
    var b =  CryptoJS.AES.decrypt(loadConf["i"], a, {
      'iv': a,
      'padding': CryptoJS.pad.Pkcs7
    })
    var c = b.toString(CryptoJS.enc.Utf8)
    console.log(c);

5f9e57fdafc7aba49f2e1a91ef91abeb.png

编写代码爬取漫画


  1. 新建一个_momomh.js 文件, 把上面的js代码稍微整理下copy进去

  2. 新建个momomh_com.py 文件 编写爬虫逻辑

  3. 1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    # !/usr/bin/python3
    # -*- coding: utf-8 -*-
    # Time    : 2020/10/28 15:35
    # Author  : Amd794
    # Email   : 2952277346@qq.com
    # Github  : https://github.com/Amd794
    import re
     
    import execjs
     
    from threading_download_images import get_response
     
     
    class Momomh(object):
        @staticmethod
        def _momomh(detail_url):
            header = {
                'User-Agent''Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1 Edg/85.0.4183.83',
            }
            response = get_response(detail_url, header=header)
            load_conf = re.findall('var loadConf = ({.*?})', response.text, re.S)[0].strip('\n')
            word = ['i:''c:''k:''d:''l:''f:']
            for in word:
                load_conf = load_conf.replace(i, l := f'"{i[0]}":')
            ctx = execjs.get().compile(open('../js/_momomh.js').read(), cwd='../js/node_modules')
            data = ctx.call('getArr'eval(load_conf))
            image_url = [url.strip('_w_720'for url in data]
            return image_url
     
     
    if __name__ == '__main__':
        print(Momomh._momomh('https://m.momomh.com/view/ZJBBO.html'))

5a11b0f3ce5aaf46fccfae66c136decd.png

4. 最后整合到主程序中测试, 没问题后就可以部署到服务器上进行爬取入库。

5. 稍微配置以下

99caac512633c92ea8ceff3b879334c7.png

6. 运行看最终结果

7.2bf7d9a93f9b779592092239cce025b7.png

图片合并


8. 下载完成后,会发现一个问题。下载下来的图片被切割了

93a6c7045490da7ffe8fd70a859ec73a.png

所以,还要做合并一下。代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# !/usr/bin/python3
# -*- coding: utf-8 -*-
# Time    : 2020/11/22 19:35
# Author  : Amd794
# Email   : 2952277346@qq.com
# Github  : https://github.com/Amd794
import os
import re
from shutil import copyfile
 
from PIL import Image
 
 
def f(s):
    try:
        return int(re.findall('\d+', s)[0])
    except IndexError:
        return 999
 
 
suffix = ['jpg''png''jpeg']
page = 5
file_list = [imgFileName for imgFileName in os.listdir('.'if
             imgFileName.endswith(tuple(suffix)) and '_w_144' in imgFileName]
file_list.sort(key=f)
file_groups = [[x for in file_list][i:i + page] for in range(0len(file_list), page)]
file_name = ''
for group in file_groups:
    print(f'-----正在操作{group}分组-----')
    image = Image.open(group[0])
    width, height = image.size
    to_image = Image.new('RGB', (width * page, height))  # 创建一个新图
    for pic in group:
        file_name = pic.replace('_w_144', '')
        to_image.paste(Image.open(pic), (int(width) * group.index(pic), 0))
    to_image.save(file_name)
    # 及时释放文件
    image.close()
    to_image.close()
for in file_list:
    try:
        os.remove(i)
    except PermissionError:
        print(f'-----{i} PermissionError-----')
 
with open('error_urls.txt''w') as fw:
    fw.close()
 
copyfile('try_to_fix.py',
         os.path.join('./''try_to_fix.py'))
os.system("python try_to_fix.py")
os.remove(__file__)

da8b1eb45a968d8a62328ce1bd19b438.png

 

posted @   Amd794  阅读(10539)  评论(0编辑  收藏  举报
编辑推荐:
· .NET开发智能桌面机器人:用.NET IoT库编写驱动控制两个屏幕
· 用纯.NET开发并制作一个智能桌面机器人:从.NET IoT入门开始
· 一个超经典 WinForm,WPF 卡死问题的终极反思
· ASP.NET Core - 日志记录系统(二)
· .NET 依赖注入中的 Captive Dependency
阅读排行:
· 在外漂泊的这几年总结和感悟,展望未来
· 博客园 & 1Panel 联合终身会员上线
· 支付宝事故这事儿,凭什么又是程序员背锅?有没有可能是这样的...
· https证书一键自动续期,帮你解放90天限制
· 在 ASP.NET Core WebAPI如何实现版本控制?
点击右上角即可分享
微信分享提示