java爬虫(八)使用node.js获取network中api接口内信息并用java的jsoup重写该方法

1.电脑安装node.js  点击官网传送门

2.在浏览器中(我用了一个谷歌内核的浏览器)找到自己url api 右键-->copy-->copy as nodejs fetch

(打开网页的审查元素后如果找不多url尝试刷新页面)

 

 

 3.将代码粘贴进js文件中(我用的记事本)需要对代码进行简单的修改

修改的内容为:头部定义fetch变量,尾部输出结果,代码如下:

粘贴出来的代码:

fetch("http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/consult/queryConsultingList.do", {
  "headers": {
    "accept": "*/*",
    "accept-language": "zh-CN,zh;q=0.9",
    "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
    "x-requested-with": "XMLHttpRequest",
    "cookie": "EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676"
  },
  "referrer": "http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/index.do",
  "referrerPolicy": "no-referrer-when-downgrade",
  "body": "consultZone=ALL&search=&consultState=0&pageNumber=1&pageSize=10",
  "method": "POST",
  "mode": "cors"
});

修改后的代码:

const fetch = require('node-fetch')
fetch("http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/consult/queryConsultingList.do", {
  "headers": {
    "accept": "*/*",
    "accept-language": "zh-CN,zh;q=0.9",
    "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
    "x-requested-with": "XMLHttpRequest",
    "cookie": "EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676"
  },
  "referrer": "http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/index.do",
  "referrerPolicy": "no-referrer-when-downgrade",
  "body": "consultZone=ALL&search=&consultState=0&pageNumber=1&pageSize=10",
  "method": "POST",
  "mode": "cors"
}).then(res=>res.json()).then(json=>console.log(json))

4.运行代码

(初次使用会报没有node-fetch这个包直接在cmd中用命令安装:npm install node-fetch即可)

cmd命令行中有两种node.js的运行方式

第一种:用node命令进入环境 然后逐句编写运行

第二种:用运行写好的node.js文件

在cmd中使用node+文件名.js即可

 5.只用jsoup重写该方法

通过不停地注释代码查看运行结果,我们发现了很多冗余参数,经过删减后的node.js代码如下

const fetch = require('node-fetch')

fetch("http://ehall.tjut.edu.cn/publicapp/sys/zxzxapp/consult/queryConsultingList.do", {
  "headers": {
    "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
    "cookie": "EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676"
  },
  "body": "consultState=0&pageNumber=1&pageSize=10",
  "method": "POST",
}).then(res=>res.json()).then(json=>console.log(json))

使用jsoup转写后如下:

转写过程中遇到的问题:

1.网页头文件中form data中的数据需要用.data( )进行赋值

2.报错:Jsoup Unhandled content type 原因是头部信息中的部分类型不符合要求

解决方法:添加头部信息  .ignoreContentType(true)

 

package debug;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

public class Myhttpclient {
    
    public static void querryhtml(String loginUrl) throws Exception{

        Document document = Jsoup.connect(loginUrl)
                // 手动设置cookies
                .header("Content-Type","application/x-www-form-urlencoded; charset=UTF-8")
                .ignoreContentType(true)
                .header("Cookie","EMAP_LANG=zh; _WEU=0lGU9nUPyZ9qx*Rn4K9rs02ZG7l70bbzRFO5mUgwGPNPYxv1E*sNledwwPblS1lfd5Ik_YiYW3vIpD3LYICxSVT8oAgUTnE3MSHiJQzRFU7hbwhsp2gIy0OWEnvRY2eX8lSq0pRdz_2.; iPlanetDirectoryPro=kAXrTrPpkhEvxffxPsHPss; amp.locale=undefined; route=8da53839b22816a2e9746dc2f57870c1; MOD_AUTH_CAS=MOD_AUTH_ST-1453563-6C0UWnfbrqXCCeeFLV9u1609300648527-21wH-cas; zg_did=%7B%22did%22%3A%20%22174575a6eca420-0b1ea05958cdee-51a2f73-1fa400-174575a6ecb376%22%7D; zg_=%7B%22sid%22%3A%201609300649292%2C%22updated%22%3A%201609300649297%2C%22info%22%3A%201608798758280%2C%22superProperty%22%3A%20%22%7B%7D%22%2C%22platform%22%3A%20%22%7B%7D%22%2C%22utm%22%3A%20%22%7B%7D%22%2C%22referrerDomain%22%3A%20%22ehall.tjut.edu.cn%22%2C%22cuid%22%3A%20%22203128301%22%2C%22zs%22%3A%200%2C%22sc%22%3A%200%2C%22firstScreen%22%3A%201609300649292%7D; asessionid=f025e83f-cdc2-4206-8bef-d521c5dfb7d2; JSESSIONID=8nexyyMRuvuF9w5FdY-wnfhPlVhKKLLKtmAHz5m3FQur3psQlbvJ!1969776676")
//                .header("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198Safari/537.36")
                .data("consultState","0")
                .data("pageSize","10")
                .data("pageNumber","1")
                .post();    
        
        System.out.println(document);    
    }
}

运行结果:

 

 7.目前唯一的问题就是cookies的有效时间问题,解决方法:通过定时登陆主页获取cookies来传递给该api

 

posted @ 2020-12-30 12:32  StarZhai  阅读(835)  评论(0编辑  收藏  举报