java htmlunit 抓取网页数据
WebClient webClient=new WebClient(BrowserVersion.CHROME); webClient.setJavaScriptTimeout(5000); webClient.getOptions().setUseInsecureSSL(true); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setTimeout(100000); webClient.getOptions().setDoNotTrackEnabled(false); HtmlPage page=webClient.getPage(this.path); webClient.waitForBackgroundJavaScript(20000); Thread.sleep(5000); HtmlDivision div=(HtmlDivision)page.getElementById("forecast"); String xml=div.asXml(); if(xml.indexOf("forecast-data-loading")>=0) { System.out.println("htmlUnit解析页面失败"); } else { System.out.println("htmlUnit解析页面成功"); int[] aqis=new int[8]; int i=0; List<HtmlTable> tables=(List<HtmlTable>)div.getByXPath("./div[2]/center[1]/table"); if(tables.size()==8) { for(HtmlTable table : tables) { List<HtmlTableRow> trs=(List<HtmlTableRow>)table.getByXPath("./tbody/tr[4]"); HtmlTableRow tr=trs.get(0); int aqi=0; List<HtmlTableCell> cells = (List<HtmlTableCell>)tr.getByXPath("./td"); for(HtmlTableCell cell : cells) { String s=cell.asText(); String [] values=s.split("\r\n"); aqi=aqi+(Integer.parseInt(values[0])+Integer.parseInt(values[1]))/2 ; } aqi=aqi/cells.size(); aqis[i]=aqi; i=i+1; } }