简单的运用ExecutorService多线程爬虫

import java.io.File;
import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import org.apache.commons.io.FileUtils;
import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.sqr.base.util.LocalUtil;

public class ExecutorsDemo {
    private static Logger logger = LoggerFactory.getLogger(ExecutorsDemo.class);
    public static void main(String[] args) {
        ExecutorService executor = Executors.newFixedThreadPool(8);
        long t0=System.currentTimeMillis();
        int sum = 100;
        String listUrl = "http://s.wanfangdata.com.cn/Paper.aspx";
        String key = "日";
        logger.debug("start pk");
        for (int i = 1; i <= sum; i++) {
            final int ii=i;
            executor.execute(() -> {
                Connection con = LocalUtil.connect(listUrl).data("q", key, "f", "top", "p", ii + "");
                Document doc = LocalUtil.getDoc(con, false);
                try {
                    FileUtils.write(new File("d:/输出/wanfangPk1/" + ii + ".html"), doc.html(), "gbk");
                } catch (IOException e) {
                    e.printStackTrace();
                }
                logger.debug("日第"+ii+"页");
            });
        }
        executor.shutdown();
        while(!executor.isTerminated()){
        }
        logger.debug("完毕!用时"+LocalUtil.formatTime(System.currentTimeMillis()-t0));
    }
}

开了八个线程,运行时间大约是单线程的1/8.

posted @ 2017-01-17 11:17  豆苗稀  阅读(296)  评论(0编辑  收藏  举报