java爬取当前疫情数据项目总结

代码

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.net.ssl.HttpsURLConnection;

import com.alibaba.fastjson.JSONArray;

import net.sf.json.JSON;
import net.sf.json.JSONObject;

public class ggg {
    public static void main(String[] args) throws IOException, SQLException {
        getAreaStat();
    }


    private static String httpRequset(String requesturl) throws IOException {
        StringBuffer buffer = null;
        BufferedReader bufferedReader = null;
        InputStreamReader inputStreamReader = null;
        InputStream inputStream = null;
        HttpsURLConnection httpsURLConnection = null;
        try {
            URL url = new URL(requesturl);
            httpsURLConnection = (HttpsURLConnection) url.openConnection();
            httpsURLConnection.setDoInput(true);
            httpsURLConnection.setRequestMethod("GET");
            inputStream = httpsURLConnection.getInputStream();
            inputStreamReader = new InputStreamReader(inputStream, "utf-8");
            bufferedReader = new BufferedReader(inputStreamReader);
            buffer = new StringBuffer();
            String str = null;
            while ((str = bufferedReader.readLine()) != null) {
                buffer.append(str);
            }
        } catch (MalformedURLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return buffer.toString();
    }


    public static String getAreaStat() throws SQLException {
        String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
        String htmlResult = "";
        try {
            htmlResult = httpRequset(url);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    

        
        String reg = "window.getAreaStat = (.*?)\\}(?=catch)";
        Pattern totalPattern = Pattern.compile(reg);
        Matcher totalMatcher = totalPattern.matcher(htmlResult);
        System.out.println(htmlResult);
        String result = "";
        if (totalMatcher.find()) {
            result = totalMatcher.group(1);
            System.out.println(result);
    
            JSONArray array = JSONArray.parseArray(result);
            
            
            Connection conn=DBUtil.DBUtil.getConnection();
            String sql=null;
            PreparedStatement pst=null;
            sql = "insert into sheng values(?,?,?,?) ";  
            
             Date dNow = new Date( );
              SimpleDateFormat ft = new SimpleDateFormat ("yyyy年MM月dd日");
              String time=ft.format(dNow);
            

                for (int i = 0; i <= 30; i++) {

                    com.alibaba.fastjson.JSONObject jsonObject =array.getJSONObject(i);
                    String provinceName = jsonObject.getString("provinceName");
                    
                        String current = jsonObject.getString("currentConfirmedCount");
                        String confirmed = jsonObject.getString("confirmedCount");
                        String cured = jsonObject.getString("curedCount");
                        String dead = jsonObject.getString("deadCount");
                        String suspect = jsonObject.getString("suspectedCount");
                        System.out.println(provinceName);
                        
                         pst = conn.prepareStatement(sql,Statement.RETURN_GENERATED_KEYS);  
                        
                            
                        
                            pst.setString(1, provinceName);  
                            pst.setString(2,confirmed ); 
                            pst.setString(3, cured); 
                            pst.setString(4, time);
                            pst.executeUpdate();
                        
                        
                        JSONArray array2 = jsonObject.getJSONArray("cities");
                        for (int j = 0; j < array2.size(); j++) {
                            com.alibaba.fastjson.JSONObject jsonObject2 =array2.getJSONObject(j);
                            String cityname = jsonObject2.getString("cityName");
                            String current2 = jsonObject2.getString("currentConfirmedCount");
                            String confirmed2 = jsonObject2.getString("confirmedCount");
                            String cured2 = jsonObject2.getString("curedCount");
                            String dead2 = jsonObject2.getString("deadCount");
                            String suspect2 = jsonObject2.getString("suspectedCount");
                            
                        }
                }
        }
        return result;
    }
}

爬取的数据是丁香医生的,由于数据是JSONArray形式,所以直接用正则表达式提取来转换成JSONArray,然后拿来用就行了。

日志:

 

日期 编号 类型 引入阶段 排除阶段 修复时间 修复缺陷
 3.10    编码 编码  3.10  JSONArray无法使用
描述:JSONArray是用不了,没有把jsoup-1.7.2.jar和fastjson-1.2.66.jar导入构建路径

 

日期 编号 类型 引入阶段 排除阶段 修复时间 修复缺陷
 3.10   编码   编码  3.10 找不到数据 
描述:正则表达式使用不熟练,"()"内为正则表达式截取内容

posted @ 2020-03-15 18:09  溜了溜  阅读(1792)  评论(0编辑  收藏  举报