htmlunit模拟登录

htmlunit jar项目路径http://sourceforge.net/projects/htmlunit/files/htmlunit/

demo代码如下

public class AutoLogin {

	/** 登录页面 */
	private static final String LOGIN_URL = "http://website/login.aspx";
	/** 任务列表页面 */
	private static final String TASK_LIST_URL = "http://website/Banli.aspx";
	
	/**
	 * @param args
	 * @throws Exception 
	 */
	public static void main(String[] args) throws Exception {
		testHomePage();
	}
	
	public static void testHomePage() throws Exception {
	    final WebClient webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_8);

	    webClient.getOptions().setThrowExceptionOnScriptError(false); //此行必须要加
	    webClient.getOptions().setCssEnabled(false);
//	    webClient.getOptions().setJavaScriptEnabled(true);
//	    webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
	    webClient.getOptions().setTimeout(300000);
	    
	    // 获取首页
	    HtmlPage page = (HtmlPage) webClient.getPage(LOGIN_URL);

	    // 根据form的名字获取页面表单,也可以通过索引来获取:page.getForms().get(0)
	    final HtmlForm form = page.getFormByName("form1");
	    
	    // 用户名/密码
	    HtmlTextInput textUserName = form.getInputByName("txtUserName");  
	    textUserName.setText("username");
	    HtmlPasswordInput txtPwd = form.getInputByName("txtPwd");
	    txtPwd.setText("pass");

	    //调用JS触发登录按钮
	    Page page1 = page.executeJavaScript("$('#btn').click()").getNewPage();
	    
	    page1 = webClient.getPage(TASK_LIST_URL);

	    System.out.println("*************************************************************************************");
	    System.out.println(page1.getWebResponse().getContentAsString());
	    System.out.println("*************************************************************************************");
	    System.out.println("");
	    System.out.println("Cookies : " + webClient.getCookieManager().getCookies().toString());
	}
}

  搞不清ASP.NET内部什么逻辑,试了很多方法都不行,查看了无所网站,无意中看到一个这个配置http://stackoverflow.com/questions/20352284/scraping-aspx-page-using-htmlunit

 1 import java.net.MalformedURLException;
 2 
 3 import com.gargoylesoftware.htmlunit.BrowserVersion;
 4 import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
 5 import com.gargoylesoftware.htmlunit.WebClient;
 6 import com.gargoylesoftware.htmlunit.html.HtmlElement;
 7 import com.gargoylesoftware.htmlunit.html.HtmlPage;
 8 
 9 public class teste {
10 
11     public static void main(String args[]) throws FailingHttpStatusCodeException, MalformedURLException, IOException
12     {
13        HtmlPage page = null;
14        String url = "http://www.bmfbovespa.com.br/cias-listadas/empresas-listadas/BuscaEmpresaListada.aspx?Idioma=pt-br";
15 
16        WebClient webClient = new WebClient(BrowserVersion.FIREFOX_17);
17 
18        webClient.getOptions().setThrowExceptionOnScriptError(false);
19        webClient.getOptions().setCssEnabled(false);
20        webClient.getOptions().setJavaScriptEnabled(false);
21        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
22        webClient.getOptions().setTimeout(30000);
23 
24        page = webClient.getPage( url );
25 
26        System.out.println("Current page: Empresas Listadas | BM&FBOVESPA");
27 
28        HtmlElement theElement1 = (HtmlElement) page.getElementById("ctl00_contentPlaceHolderConteudo_BuscaNomeEmpresa1_btnTodas");
29        page = theElement1.click();
30 
31        System.out.println(page.asText());
32 
33        System.out.println("Test has completed successfully");
34     }
35 
36 }

最后测试下来,如果不加 webClient.getOptions().setThrowExceptionOnScriptError(false);就一直报这个错误

  1 Exception in thread "main" ======= EXCEPTION START ========
  2 Exception class=[java.lang.RuntimeException]
  3 com.gargoylesoftware.htmlunit.ScriptException: Exception invoking click
  4     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954)
  5     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
  6     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
  7     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
  8     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
  9     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
 10     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
 11     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878)
 12     at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48)
 13     at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23)
 14 Caused by: java.lang.RuntimeException: Exception invoking click
 15     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181)
 16     at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449)
 17     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536)
 18     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
 19     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105)
 20     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411)
 21     at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309)
 22     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286)
 23     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115)
 24     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
 25     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
 26     ... 9 more
 27 Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7)
 28     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954)
 29     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
 30     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
 31     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
 32     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
 33     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
 34     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
 35     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354)
 36     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415)
 37     at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271)
 38     at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293)
 39     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799)
 40     at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
 41     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756)
 42     at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170)
 43     at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072)
 44     at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206)
 45     at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330)
 46     at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126)
 47     at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093)
 48     at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920)
 49     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499)
 50     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452)
 51     at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
 52     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039)
 53     at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252)
 54     at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198)
 55     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271)
 56     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159)
 57     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478)
 58     at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352)
 59     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183)
 60     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121)
 61     at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893)
 62     at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227)
 63     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485)
 64     at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135)
 65     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982)
 66     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072)
 67     at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789)
 68     at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152)
 69     at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477)
 70     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
 71     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
 72     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 73     at java.lang.reflect.Method.invoke(Method.java:606)
 74     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153)
 75     ... 19 more
 76 Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxxx/305000772#7)
 77     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935)
 78     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919)
 79     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944)
 80     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960)
 81     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971)
 82     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519)
 83     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243)
 84     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
 85     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118)
 86     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
 87     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
 88     ... 65 more
 89 Enclosed exception: 
 90 java.lang.RuntimeException: Exception invoking click
 91     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:181)
 92     at net.sourceforge.htmlunit.corejs.javascript.FunctionObject.call(FunctionObject.java:449)
 93     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1536)
 94     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
 95     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.call(InterpretedFunction.java:105)
 96     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.doTopCall(ContextFactory.java:411)
 97     at com.gargoylesoftware.htmlunit.javascript.HtmlUnitContextFactory.doTopCall(HtmlUnitContextFactory.java:309)
 98     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.doTopCall(ScriptRuntime.java:3286)
 99     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:115)
100     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
101     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
102     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
103     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
104     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
105     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
106     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
107     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
108     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScript(HtmlPage.java:878)
109     at com.suypower.AutoLogin12345.testHomePage(AutoLogin12345.java:48)
110     at com.suypower.AutoLogin12345.main(AutoLogin12345.java:23)
111 Caused by: com.gargoylesoftware.htmlunit.ScriptException: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7)
112     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:954)
113     at net.sourceforge.htmlunit.corejs.javascript.Context.call(Context.java:628)
114     at net.sourceforge.htmlunit.corejs.javascript.ContextFactory.call(ContextFactory.java:513)
115     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:836)
116     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:812)
117     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.execute(JavaScriptEngine.java:800)
118     at com.gargoylesoftware.htmlunit.html.HtmlPage.executeJavaScriptIfPossible(HtmlPage.java:910)
119     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeInlineScriptIfNeeded(HtmlScript.java:354)
120     at com.gargoylesoftware.htmlunit.html.HtmlScript.executeScriptIfNeeded(HtmlScript.java:415)
121     at com.gargoylesoftware.htmlunit.html.HtmlScript$3.execute(HtmlScript.java:271)
122     at com.gargoylesoftware.htmlunit.html.HtmlScript.onAllChildrenAddedToPage(HtmlScript.java:293)
123     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:799)
124     at org.apache.xerces.parsers.AbstractSAXParser.endElement(Unknown Source)
125     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.endElement(HTMLParser.java:756)
126     at org.cyberneko.html.HTMLTagBalancer.callEndElement(HTMLTagBalancer.java:1170)
127     at org.cyberneko.html.HTMLTagBalancer.endElement(HTMLTagBalancer.java:1072)
128     at org.cyberneko.html.filters.DefaultFilter.endElement(DefaultFilter.java:206)
129     at org.cyberneko.html.filters.NamespaceBinder.endElement(NamespaceBinder.java:330)
130     at org.cyberneko.html.HTMLScanner$ContentScanner.scanEndElement(HTMLScanner.java:3126)
131     at org.cyberneko.html.HTMLScanner$ContentScanner.scan(HTMLScanner.java:2093)
132     at org.cyberneko.html.HTMLScanner.scanDocument(HTMLScanner.java:920)
133     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:499)
134     at org.cyberneko.html.HTMLConfiguration.parse(HTMLConfiguration.java:452)
135     at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
136     at com.gargoylesoftware.htmlunit.html.HTMLParser$HtmlUnitDOMBuilder.parse(HTMLParser.java:1039)
137     at com.gargoylesoftware.htmlunit.html.HTMLParser.parse(HTMLParser.java:252)
138     at com.gargoylesoftware.htmlunit.html.HTMLParser.parseHtml(HTMLParser.java:198)
139     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createHtmlPage(DefaultPageCreator.java:271)
140     at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPageCreator.java:159)
141     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:478)
142     at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:352)
143     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPageIfPossible(BaseFrameElement.java:183)
144     at com.gargoylesoftware.htmlunit.html.BaseFrameElement.loadInnerPage(BaseFrameElement.java:121)
145     at com.gargoylesoftware.htmlunit.html.HtmlPage.loadFrames(HtmlPage.java:1893)
146     at com.gargoylesoftware.htmlunit.html.HtmlPage.initialize(HtmlPage.java:227)
147     at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient.java:485)
148     at com.gargoylesoftware.htmlunit.WebClient.loadDownloadedResponses(WebClient.java:2135)
149     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.doProcessPostponedActions(JavaScriptEngine.java:982)
150     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine.processPostponedActions(JavaScriptEngine.java:1072)
151     at com.gargoylesoftware.htmlunit.html.DomElement.click(DomElement.java:789)
152     at com.gargoylesoftware.htmlunit.html.HtmlImageInput.click(HtmlImageInput.java:152)
153     at com.gargoylesoftware.htmlunit.javascript.host.html.HTMLInputElement.click(HTMLInputElement.java:477)
154     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
155     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
156     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
157     at java.lang.reflect.Method.invoke(Method.java:606)
158     at net.sourceforge.htmlunit.corejs.javascript.MemberBox.invoke(MemberBox.java:153)
159     ... 19 more
160 Caused by: net.sourceforge.htmlunit.corejs.javascript.EcmaError: TypeError: Cannot read property "nodeName" from null (http://xxx/305000772#7)
161     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3935)
162     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.constructError(ScriptRuntime.java:3919)
163     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError(ScriptRuntime.java:3944)
164     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.typeError2(ScriptRuntime.java:3960)
165     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.undefReadError(ScriptRuntime.java:3971)
166     at net.sourceforge.htmlunit.corejs.javascript.ScriptRuntime.getObjectProp(ScriptRuntime.java:1519)
167     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpretLoop(Interpreter.java:1243)
168     at net.sourceforge.htmlunit.corejs.javascript.Interpreter.interpret(Interpreter.java:798)
169     at net.sourceforge.htmlunit.corejs.javascript.InterpretedFunction.exec(InterpretedFunction.java:118)
170     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$3.doRun(JavaScriptEngine.java:827)
171     at com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine$HtmlUnitContextAction.run(JavaScriptEngine.java:939)
172     ... 65 more
173 ======= EXCEPTION END ========

希望能帮助到你,晚安!

 

posted @ 2016-01-17 00:49  易木  阅读(14970)  评论(0编辑  收藏  举报