1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | package com.xazhxc.htjcom.back.controller.base; import cn.hutool.core.util.StrUtil; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.xazhxc.htjcom.entity.Citys; import com.xazhxc.htjcom.init.HttpServerInit; import com.xazhxc.htjcom.kit.Kits; import com.xazhxc.htjcom.kit.PropsKit; import com.xazhxc.htjcom.kit.UploadKit; import com.xazhxc.htjcom.service.CitysService; import lombok.extern.slf4j.Slf4j; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.tio.core.ChannelContext; import org.tio.core.GroupContext; import org.tio.http.common.HttpRequest; import org.tio.http.common.HttpResponse; import org.tio.http.common.UploadFile; import org.tio.http.server.annotation.RequestPath; import org.tio.http.server.mvc.Routes; import org.tio.http.server.util.Resps; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; /** * 公共类 * * @author leizhen.wang */ @RequestPath (value = "/base" ) @Slf4j public class BaseController { static CitysService citysService = Kits.getBean( CitysService. class ); @Mapper ProductService productService; private static Map<Integer, String> cssMap = new HashMap<Integer, String>(); private static BufferedWriter bufferedWriter = null ; static { cssMap.put( 1 , "provincetr" ); // 省 cssMap.put( 2 , "citytr" ); // 市 cssMap.put( 3 , "countytr" ); // 市 } @RequestPath (value = "/pro" ) public HttpResponse product(HttpRequest request) throws IOException { new Thread(() -> { try { initFile(); Document connect = Jsoup.connect( "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/" ).get(); int level = 1 ; Elements rowProvince = connect.select( "tr." + cssMap.get(level)); for (Element provinceElement : rowProvince) { Elements select = provinceElement.select( "a" ); for (Element province : select) { try { parseNextLevel(province, level + 1 , null , null ); } catch (IOException e) { e.printStackTrace(); } // System.out.println("----province-----"+province); } } closeStream(); } catch (IOException e) { e.printStackTrace(); } }).start(); return Resps.json( request, Kits.result().ok() ); } private static void closeStream() { if (bufferedWriter != null ) { try { bufferedWriter.close(); } catch (IOException e) { e.printStackTrace(); } } } private static void parseNextLevel(Element parentElement, int level, String code, String area) throws IOException { try { Thread.sleep( 500 ); } catch (InterruptedException e) { e.printStackTrace(); } String attr = parentElement.attr( "abs:href" ); if (StrUtil.isEmpty( code )) { code = attr.substring( 54 , 56 )+ "0000" ; } // Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), code, parentElement.text(), "-1"); // citysService.insert( citys ); Document doc = Jsoup.connect( attr).get(); if (doc != null ) { Elements newsHeadlines = doc.select( "tr." + cssMap.get(level)); for (Element element : newsHeadlines) { if (StrUtil.isEmpty( area )) { // printInfo(element, level + 1, code); } else { printInfo2(element, level + 1 , code); } Elements select = element.select( "a" ); // 在递归调用的时候,这里是判断是否是村一级的数据,村一级的数据没有a标签 // System.out.println(select); if (select.size() != 0 ) { code = element.select( "td" ).first().text(); parseNextLevel2(select.last(), level + 1 , code, "area" ); } } } } private static void parseNextLevel2(Element parentElement, int level, String code, String area) throws IOException { try { Thread.sleep( 500 ); } catch (InterruptedException e) { e.printStackTrace(); } String attr = parentElement.attr( "abs:href" ); if (StrUtil.isEmpty( code )) { code = attr.substring( 54 , 56 )+ "0000" ; } Document doc = Jsoup.connect( attr).get(); if (doc != null ) { Elements newsHeadlines = doc.select( "tr." + cssMap.get(level)); for (Element element : newsHeadlines) { printInfo2(element, level + 1 , code); } } } private static void initFile() throws IOException { bufferedWriter = new BufferedWriter( new FileWriter( new File( "d:\\CityInfo.txt" ), true )); } private static void printInfo(Element element, int level, String pid) { // System.out.println(pid + "---: "+element.select("td").last().text()+"============="+element.select("td").first().text()); Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), element.select( "td" ).first().text(), element.select( "td" ).last().text(), pid); citysService.insert( citys ); /*try { bufferedWriter.write(element.select("td").last().text() + "{" + level + "}[" + element.select("td").first().text() + "]"); bufferedWriter.newLine(); bufferedWriter.flush(); } catch (IOException e) { e.printStackTrace(); }*/ } private static void printInfo2(Element element, int level, String pid) { // System.out.println(pid + "---: "+element.select("td").last().text()+"============="+element.select("td").first().text()); Citys citys = new Citys(String.valueOf( SnowFlakeUtil.getFlowIdInstance().nextId() ), element.select( "td" ).first().text(), element.select( "td" ).last().text(), pid); citysService.insert( citys ); /*try { bufferedWriter.write(element.select("td").last().text() + "{" + level + "}[" + element.select("td").first().text() + "]"); bufferedWriter.newLine(); bufferedWriter.flush(); } catch (IOException e) { e.printStackTrace(); }*/ } } |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了