HtmlUnit入门二
由于在在WebClient中,默认支持对CSS,JavaScript的解析,因此会总是会出现很多错误信息,并且执行速度也很慢。
因此,我们可以选择关闭掉WebClient对CSS,JavaScript的解析。
使用WebClient#getOption()方法,返回一个WebClientOptions对象,可以对WebClient进行很多设置。比如:使用代理,设置连接的有效时间之类的。
◇关闭对Css,JavaScript的支持
package com.fuwh; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage; public class Demo04 { public static void main(String[] args) { WebClient webClient=null; try { webClient= new WebClient(BrowserVersion.FIREFOX_45); //定义一个WebClient webClient.getOptions().setCssEnabled(false); //设置CSS解析无效 webClient.getOptions().setJavaScriptEnabled(false);//设置JavaScript解析无效 final HtmlPage page=webClient.getPage("http://www.tuicool.com/"); //从指定URL获取HtmlPage System.out.println(page.asXml()); } catch (Exception e) { // TODO: handle exception e.printStackTrace(); }finally { webClient.close(); //关闭客户端 } } }
◇使用代理ip
package com.fuwh; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.ProxyConfig; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.WebClientOptions; import com.gargoylesoftware.htmlunit.html.HtmlPage; public class Demo05 { public static void main(String[] args) { WebClient webClient=null; try { webClient= new WebClient(BrowserVersion.FIREFOX_45); //定义一个WebClient WebClientOptions webClientOptions=webClient.getOptions(); webClientOptions.setCssEnabled(false); //设置CSS解析无效 webClientOptions.setJavaScriptEnabled(false);//设置JavaScript解析无效 webClientOptions.setProxyConfig(new ProxyConfig("49.212.39.221", 3129)); final HtmlPage page=webClient.getPage("http://www.tuicool.com/"); //从指定URL获取HtmlPage System.out.println(page.asXml()); } catch (Exception e) { // TODO: handle exception e.printStackTrace(); }finally { webClient.close(); //关闭客户端 } } }
◇模拟提交表单
package com.fuwh; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlButton; import com.gargoylesoftware.htmlunit.html.HtmlButtonInput; import com.gargoylesoftware.htmlunit.html.HtmlDivision; import com.gargoylesoftware.htmlunit.html.HtmlForm; import com.gargoylesoftware.htmlunit.html.HtmlInput; import com.gargoylesoftware.htmlunit.html.HtmlObject; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.html.HtmlSpan; import com.gargoylesoftware.htmlunit.html.HtmlSubmitInput; import com.gargoylesoftware.htmlunit.html.HtmlTextInput; public class Demo06 { public static void main(String[] args) { try(final WebClient webClient= new WebClient(BrowserVersion.CHROME)) { final HtmlPage page=webClient.getPage("https://mvnrepository.com/"); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); HtmlDivision htmlDiv=(HtmlDivision) page.getByXPath("//div[@id='search']").get(0); HtmlForm form=(HtmlForm) htmlDiv.getElementsByTagName("form").get(0); // System.out.println(form.asXml()); HtmlTextInput textInput=form.getInputByName("q"); HtmlSubmitInput submit=form.getInputByValue("Search"); textInput.setValueAttribute("junit"); HtmlPage resultPage=submit.click(); // System.out.println(resultPage.asXml()); HtmlDivision htmlDiv2=resultPage.getFirstByXPath("//div[@class='im']"); System.out.println(htmlDiv2.asXml()); // webClient.close(); } catch (Exception e) { // TODO: handle exception e.printStackTrace(); } } }