人生第一个java脚本-jsoup实例

目的:

  获取如下信息;

   

 

制作流程图

 

 

 该方法缺点,会获取到多个重复货号。

解决:导出成excel表格-》选择 货号 列 -》删除重复值

代码结构如下:

ToMain.java

 


package com.lnthz.main;



import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.lnthz.cookie.CookieUtil;
import com.lnthz.jdbc.JdbcMain;
import com.lnthz.pojo.ItemCas;
import com.lnthz.pojo.TargetData;
import com.lnthz.pojo.XDocDataPojo;


/**
 * @Desc 主类
 * @author lnthz
 * @param
 *
 */
public class ToMain {

      public static void main(String[] args) throws Exception{
            
            ToMain.JueDDZ(441, 1000);
            //这两个参数是为了方便调试,有少量目标网页规则不一样,也可以用作开线程    
     }
    //此方法为了找到 每个货号对应的绝对地址
 
    public static void JueDDZ(int aa,int bb) throws Exception{
               ItemCas itemCas=new ItemCas();
               
            int HH=100001;
            String aUrl="https://www.xfnano.com/Product/?1=1&key=";        
            //找到规则循环货期地址
            for (int i = aa; i<bb; ++i) {
                //空Url
                String nullUrl="https://www.xfnano.com/Product/comment.aspx?fk=0&kind=0&width=520&height=350&TB_iniframe=true&KeepThis=true&TB_iframe=true&modal=false";
                int aHH=HH+i;
                //拼接字符串
                String bUrl=aUrl+aHH;
                //得到整个目标页面源码
                Document doc = Jsoup.connect(bUrl).get();
                //得到货号所在的 div
                Element clasDoc=doc.select("div.pro_list_container").first();    
                //得到货号地址
                Elements links = clasDoc.select("a[href]"); 
                String casName = clasDoc.select("a[href]").text();
                //得到绝对地址 删选出来空地址
                String absHref = links.attr("abs:href");//
                
                if(absHref.equals(nullUrl)){
                    continue;
                }else{
                    /*ToMain.JueDDZ(absHref);*/
                    /* System.out.println(absHref);*/
                    
                    System.out.println("当前i值:"+i+"当前地址:"+absHref);
                    itemCas.setItem(aHH);
                    itemCas.setCasName(casName);
                    itemCas.setMaincasurl(absHref);
                    JdbcMain.addItemCas(itemCas);                    
                    ToMain.xTableData(absHref);
                    ToMain.xDocData(aHH,absHref);
                }
                
                
            }
            System.out.println("最后");
            JdbcMain.jdbcClose();
    } 
      /**
       * 此方法用于获取货号对应的详细介绍
       * @param absHref
       */
    private static void xDocData(int aHH,String absHref) throws Exception{
        // TODO Auto-generated method stub
        XDocDataPojo xd=new XDocDataPojo();
        String url=absHref;
        Connection conn=Jsoup.connect(url);
        conn.cookies(CookieUtil.getCookies());
        Document doc_x=conn.get();
//        Element doc_d=doc_x.getElementById("conn");
//        System.out.println(doc_d.val());
//        if (doc_d.val() != null) {
//            String x2doc=doc_d.select("div.other_r div.pro_detail").html();
//            xd.setItem(aHH);
//            xd.setXdoc(x2doc);
//            JdbcMain.addXDocDataPojo(xd);
//        } else {
        
            Elements x1doc=doc_x.select("div.pro_contbox");
            Elements x2doc=x1doc.select("div.other_r");
            String x3doc=x2doc.select("div.pro_detail").html();
              
//            System.out.println(""+x1doc);
            xd.setItem(aHH);
            xd.setXdoc(x3doc);
            JdbcMain.addXDocDataPojo(xd);
//        }
        
        
    }
    /**
     * 此方法用于获取表格详细数据
     * @author lnthz
     * @param absHref
     */
    private static void xTableData(String absHref) throws Exception{
        // TODO Auto-generated method stub
        JdbcMain td=new JdbcMain();
        List list = new ArrayList();
        String url=absHref;
        Connection conn=Jsoup.connect(url);
        conn.cookies(CookieUtil.getCookies());
        Document doc_t=conn.get();
        Elements doc_table=doc_t.select("div.pro_contbox div.tablelist");
        // 使用选择器选择该table内所有的<tr> <tr/>    
        Elements trs = doc_table.select("tr");
        /*System.out.println(trs);*/
        //遍历表格
        //i=0,带第一行标题; i=1 不带第一行标题    
        for (int i = 1; i < trs.size(); ++i) {
        // 获取一个tr
            Element tr = trs.get(i);
        // 获取该行的所有td节点
            Elements tds = tr.select("td");    
        //遍历td数据    
            HashMap<Integer,String> map=new HashMap<Integer,String>();    
                for(int j=0; j<tds.size(); j++){
                    Element[] array=new Element[16];    
                    array[j]= tds.get(j);
                    map.put(j, array[j].text());
                }
            list.add(map);
/*            System.out.println("-----------------");    */
        }
        td.insertCas(list);
    
    }
    
}

JdbcMain.java


package com.lnthz.jdbc;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.lnthz.pojo.ItemCas;
import com.lnthz.pojo.XDocDataPojo;




public class JdbcMain {
    public static final String URL = "jdbc:mysql://localhost:3307/webCas?useUnicode=true&characterEncoding=utf8";
    public static final String USER = "root";
    public static final String PASSWORD = "123456";
    private static Connection conn = null;
    static{
        try {
            //1.加载驱动程序
            Class.forName("com.mysql.jdbc.Driver");
            //2. 获得数据库连接
            conn = DriverManager.getConnection(URL, USER, PASSWORD);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }
    
   public  static void jdbcClose(){
        try {
            System.out.println("数据库已关闭(* ̄︶ ̄)");
            conn.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    
    }
    public static Connection getConnection(){
        return conn;
    }
    /**
     * 此方法用于增加表格详情
     * @param list
     * @throws Exception
     */
    public static void insertCas(List list)throws Exception{
        Map map=null;
        Connection conn = JdbcMain.getConnection();
        String str="insert into TargetData(id,itemnumber,casnumber,packnumber,parameter,instock,period,price,shu) values(?,?,?,?,?,?,?,?,?)";
        PreparedStatement pstat = conn.prepareStatement(str);
        for(int h =0;h<list.size();h++){
            map = (Map)list.get(h);
            Iterator<Map.Entry<Integer, String>> entries = map.entrySet().iterator(); 
            while (entries.hasNext()) {               
              Map.Entry<Integer, String> entry = entries.next(); 
             int a=entry.getKey()+1;
            pstat.setString(a,entry.getValue());  
                         
            }
        
            pstat.executeUpdate();
        }
        System.out.println("TargetData插入成功(* ̄︶ ̄)");
        
    }
    /**
     * 用于ItemCas数据表增加
     * 
     * @param i
     */
    public static void addItemCas(ItemCas i) {
        // TODO Auto-generated method stub
        Connection conn = JdbcMain.getConnection();
        String sql="insert into ItemCas(item,casName,maincasurl) values (?,?,?)";
        PreparedStatement ptmt;
        try {
            ptmt = conn.prepareStatement(sql);
            ptmt.setInt(1,i.getItem());
            ptmt.setString(2, i.getCasName());
            ptmt.setNString(3, i.getMaincasurl());
            System.out.println("ItemCas插入成功(* ̄︶ ̄)");
            ptmt.executeUpdate();
        } catch (SQLException e) {
            e.printStackTrace();
        }finally {
        
        }
        
    }

    public static void addXDocDataPojo(XDocDataPojo xd) throws SQLException{
        Connection conn=JdbcMain.getConnection();
        PreparedStatement ptmt=null;
        String sql="insert into XDocDataPojo(item,xdoc) values(?,?)";
        ptmt=conn.prepareStatement(sql);
        ptmt.setInt(1, xd.getItem());
        ptmt.setString(2, xd.getXdoc());
        System.out.println("XDocDataPojo插入成功(* ̄︶ ̄)");
        ptmt.executeUpdate();
        
    }
}

ItemCas.java

package com.lnthz.pojo;

public class ItemCas {
    public int item;
    public String casName;
    public String maincasurl;
    
    public String getCasName() {
        return casName;
    }
    public void setCasName(String casName) {
        this.casName = casName;
    }
    public int getItem() {
        return item;
    }
    public void setItem(int item) {
        this.item = item;
    }
    public String getMaincasurl() {
        return maincasurl;
    }
    public void setMaincasurl(String maincasurl) {
        this.maincasurl = maincasurl;
    }
    
}

XDocDataPojo.java

package com.lnthz.pojo;

public class XDocDataPojo {
    public int item;
    public String xdoc;
    public int getItem() {
        return item;
    }
    public void setItem(int aHH) {
        this.item = aHH;
    }
    public String getXdoc() {
        return xdoc;
    }
    public void setXdoc(String xdoc) {
        this.xdoc = xdoc;
    }
    
}

CookieUtil.java

package com.lnthz.cookie;

import java.util.HashMap;

public class CookieUtil {
    static HashMap cookies;
    
    static{
        HashMap cookie=new HashMap();
          //目标网站需要登录,cookie表自行解决,put参数就行
       
        cookie.put("Hm_lvt_d4e9a2b5f76697fc95880ee989b6b944", "1543460799,1543894953,1543987988,1543992054");
        cookie.put("LXB_REFER", "www.baidu.com");
        
    }
    public static HashMap getCookies(){
        return cookies;
    }
    
}

 

 

 

posted @ 2018-12-05 15:37  流年拓荒者  阅读(2438)  评论(0编辑  收藏  举报
   
在博客右侧,浮出一个微博浮动框