Java爬取丁香医生疫情数据并存储至数据库

1、通过页面的url获取html代码

// 根URL
	private static String httpRequset(String requesturl) throws IOException {
		StringBuffer buffer = null;
		BufferedReader bufferedReader = null;
		InputStreamReader inputStreamReader = null;
		InputStream inputStream = null;
		HttpsURLConnection httpsURLConnection = null;
		try {
			URL url = new URL(requesturl);
			httpsURLConnection = (HttpsURLConnection) url.openConnection();
			httpsURLConnection.setDoInput(true);
			httpsURLConnection.setRequestMethod("GET");
			inputStream = httpsURLConnection.getInputStream();
			inputStreamReader = new InputStreamReader(inputStream, "utf-8");
			bufferedReader = new BufferedReader(inputStreamReader);
			buffer = new StringBuffer();
			String str = null;
			while ((str = bufferedReader.readLine()) != null) {
				buffer.append(str);
			}
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return buffer.toString();
	}

  2、获取省市疫情数据

/**
	 * 获取全国各个省市的确诊、死亡和治愈人数
	 * 
	 * @return
	 */
	public static String getAreaStat() {
		String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
		String htmlResult = "";
		try {
			htmlResult = httpRequset(url);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		// System.out.println(htmlResult);

		// 正则获取数据
		// 因为html的数据格式看着就像json格式,所以我们正则获取json
		String reg = "window.getAreaStat = (.*?)\\}(?=catch)";
		Pattern totalPattern = Pattern.compile(reg);
		Matcher totalMatcher = totalPattern.matcher(htmlResult);

		String result = "";
		if (totalMatcher.find()) {
			result = totalMatcher.group(1);
			System.out.println(result);
			// 各个省市的是一个列表List,如果想保存到数据库中,要遍历结果,下面是demo
			JSONArray array = JSONArray.parseArray(result);
			try {
				Connection con =BaseConnection.getConnection("VData");
				Statement stmt = con.createStatement();
				Date date=new Date(System.currentTimeMillis());
				for (int i = 0; i <= 30; i++) {

					com.alibaba.fastjson.JSONObject jsonObject = com.alibaba.fastjson.JSONObject
							.parseObject(array.getString(i));
					String provinceName = jsonObject.getString("provinceName");
					String current = jsonObject.getString("currentConfirmedCount");
					String confirmed = jsonObject.getString("confirmedCount");
					String cured = jsonObject.getString("curedCount");
					String dead = jsonObject.getString("deadCount");
					String suspect=jsonObject.getString("suspectedCount");
					stmt.executeUpdate("insert into province values('"+provinceName+"','"+confirmed+"','"+suspect+"','"+cured+
							"','"+dead+"','"+current+"','"+date+"')");
					
					JSONArray array2 = jsonObject.getJSONArray("cities");
					for (int j = 0; j < array2.size(); j++) {
						com.alibaba.fastjson.JSONObject jsonObject2 = com.alibaba.fastjson.JSONObject
								.parseObject(array2.getString(j));
						String cityname = jsonObject2.getString("cityName");
						String current2 = jsonObject2.getString("currentConfirmedCount");
						String confirmed2 = jsonObject2.getString("confirmedCount");
						String cured2 = jsonObject2.getString("curedCount");
						String dead2 = jsonObject2.getString("deadCount");
						String suspect2 = jsonObject2.getString("suspectedCount");
						System.out.println();
						stmt.executeUpdate("insert into city values('"+cityname+"','"+confirmed2+"','"+suspect2+"','"+cured2+"','"+dead2+"','"+current2+"','"+provinceName+"','"+date+"')");
					}
				}
				stmt.close();
				con.close();
			} catch (SQLException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return result;
	}

  3、获取世界疫情数据

/**
	 * 世界
	 * 
	 * @return
	 */
	public static String getCountryData() {
		String url = "https://ncov.dxy.cn/ncovh5/view/pneumonia";
		String htmlResult = "";
		try {
			htmlResult = httpRequset(url);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		String reg = "window.getListByCountryTypeService2true = (.*?)\\}(?=catch)";
		Pattern totalPattern = Pattern.compile(reg);
		Matcher totalMatcher = totalPattern.matcher(htmlResult);

		String result = "";
		if (totalMatcher.find()) {
			result = totalMatcher.group(1);
			System.out.println(result);
			JSONArray array = JSONArray.parseArray(result);
			try {
				Connection con =BaseConnection.getConnection("VData");
				Statement stmt = con.createStatement();
				for(int i=0;i<array.size();i++) {
					com.alibaba.fastjson.JSONObject jsobj=com.alibaba.fastjson.JSONObject.parseObject(array.getString(i));
					if(!jsobj.getString("provinceName").equals("中国")) {
					Date date = new Date(Long.parseLong(jsobj.getString("createTime")));
					String s="insert into contury values('"+jsobj.getString("continents")+"','"+jsobj.getString("provinceName")
					+"','"+jsobj.getString("currentConfirmedCount")+"','"+jsobj.getString("confirmedCount")+"','"+jsobj.getString("suspectedCount")
					+"','"+jsobj.getString("curedCount")+"','"+jsobj.getString("deadCount")+"','"+date+"')";
					stmt.executeUpdate(s);
					}
				}
				stmt.close();
				con.close();
			}catch (Exception e) {
				// TODO: handle exception
			}
		}
		return "";
	}

  

posted @ 2020-03-26 19:14  XiaoGao128  阅读(2443)  评论(1编辑  收藏  举报