Webclent基本操作

 

/**   
* @Title: webclientTest.java 
* @Package webclient 
* @Description: TODO(用一句话描述该文件做什么) 
* @author A18ccms A18ccms_gmail_com   
* @date 2016年8月30日 下午3:52:41 
* @version V1.0   
*/
package webclient;

import java.io.IOException;
import java.net.MalformedURLException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

/**
 * @ClassName: webclientTest
 * @Description: Webclent基本操作
 * @author zeze
 * @date 2016年8月30日 下午3:52:41
 * 
 */
public class webclientTest {

    /**
     * 
     * @Title: main
     * @param @param
     *            args 设定文件
     * @return void 返回类型
     */
    public static void main(String[] args) {
        // TODO Auto-generated method stub

        WebClient webClient = new WebClient(BrowserVersion.CHROME);
        webClient.getOptions().setTimeout(20000);
        webClient.getCookieManager().setCookiesEnabled(true);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setJavaScriptEnabled(false);
        webClient.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) "
                + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/43.0.2357.134 Safari/537.36");
        webClient.addRequestHeader("Accept",
                "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
        webClient.addRequestHeader("Accept-Encoding", "gzip, deflate, sdch");
        webClient.addRequestHeader("Accept-Language", "zh-CN,zh;q=0.8");
        webClient.addRequestHeader("Connection", "keep-alive");
        webClient.addRequestHeader("Host", "www.cnblogs.com");
        // webClient.addRequestHeader("", "");
        HtmlPage page = null;
        try {
            String Url = "http://www.cnblogs.com/zeze";
            page = webClient.getPage(Url);
            Document doc = Jsoup.parse(page.asXml());
            System.out.println(doc);
            while (doc.select("title").text().equals("Robot Check")) {
                HtmlForm form = page.getForms().get(0);
                HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0);
                form.getInputByName("field-keywords").setValueAttribute("");
                doc = Jsoup.parse(page.asXml());
                try {
                    page = button.click();
                } catch (IOException e1) {
                    e1.printStackTrace();
                }
            }
        } catch (FailingHttpStatusCodeException e) {

        } catch (MalformedURLException e) {

        } catch (IOException eq) {

        }
    }

}

 

posted @ 2016-08-30 16:08  陈泽泽  阅读(323)  评论(0编辑  收藏  举报