团队项目-第一阶段冲刺-4
一、说在前面
1、昨天完成了:
1)新闻详情页的实现。
2)实现评论区。
2、今天完成了:
1)脱离对现成的api的依赖,自己独立爬取、处理和存储数据,实现数据初步审核去除“坏数据”。
2)在web端,自己编写自己的“api”接口,灵活的组织和传递自己所需的数据。
3)重构前三天的代码,加强了app的稳定性,(比如说:现成的api接口用个别的图片地址形式和其他大多数不同,按照统一的方法进行渲染时会出现系统崩溃的现象)。
4)新闻类别数据项展示时换用了卡片视图,是界面更加美观。
3、明天的计划:
1)实现查看历史记录的功能。(5h)
4、遇到的问题:
1)使用Gson工具将json数组转换成java中的list是出现异常。
解决方法:自定义一个相应的list的数据类型: new TypeToken<List<News>>(){}.getType(),并把它作为函数fromJson,的第二个参数。
list = gson.fromJson(s, new TypeToken<List<News>>(){}.getType());
二、冲刺成果
原来和现在的对比:
三、代码
1、新api接口的数据结构
{ "id": 1626, "priority": 81, "commentCount": 43958, "source": "星视频#", "title": "深圳一大学生醉驾逆行 20岁外卖小哥被撞当场死亡", "url": "https://3g.163.com/news/20/0418/00/FAF3MI0T0001899O.html", "digest": "近日,深圳。一名即将毕业的大学生酒驾深夜发生事故,逃离现场再", "imgsrc": "http://cms-bucket.ws.126.net/2020/0418/b0d761cbp00q8xxlc00ezc000s600e3c.png", "ptime": "2020-04-18 00:24:55", "zw": "精彩弹幕,尽在客户端 近日,深圳。一名即将毕业的大学生酒驾深夜发生事故,逃离现场再返回,被撞外卖员当场死亡。当时车上还有三名同乘人员,均为醉酒状态,车行驶到事发路段时,为逆向行驶状态。目前肇事车辆车速待进一步鉴定,该案仍在进一步调查处理中。 (原标题:深圳一大学生深夜醉驾逆行,20岁外卖小哥被撞当场死亡) (责任编辑:李超_NB12814)", "type": "新闻" }
2、对应的新实体:
package com.me.domain; public class News { private int id; private int priority; private int commentCount; private String source; private String title; private String url; private String digest; private String imgsrc; private String ptime; private String zw; private String type; public String getType() { return type; } public void setType(String type) { this.type = type; } @Override public String toString() { return "News{" + "id=" + id + ", priority=" + priority + ", commentCount=" + commentCount + ", source='" + source + '\'' + ", title='" + title + '\'' + ", url='" + url + '\'' + ", digest='" + digest + '\'' + ", imgsrc='" + imgsrc + '\'' + ", ptime='" + ptime + '\'' + ", zw='" + zw + '\'' + '}'; } public String getZw() { return zw; } public void setZw(String zw) { this.zw = zw; } public int getId() { return id; } public void setId(int id) { this.id = id; } public int getPriority() { return priority; } public void setPriority(int priority) { this.priority = priority; } public int getCommentCount() { return commentCount; } public void setCommentCount(int commentCount) { this.commentCount = commentCount; } public String getSource() { return source; } public void setSource(String source) { this.source = source; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getDigest() { return digest; } public void setDigest(String digest) { this.digest = digest; } public String getImgsrc() { return imgsrc; } public void setImgsrc(String imgsrc) { this.imgsrc = imgsrc; } public String getPtime() { return ptime; } public void setPtime(String ptime) { this.ptime = ptime; } }
3、数据爬取的业务逻辑代码
package com.me.service; import com.google.gson.Gson; import com.me.dao.NewsDao; import com.me.domain.News; import com.me.domain.NewsListData; import com.me.utils.HttpUtil; import com.me.utils.JsoupNewsUtil; import java.sql.SQLException; import java.util.List; public class NewsList { static NewsDao dao = new NewsDao(); public void deleteBad(){ try { dao.deleteBad(); } catch (SQLException e) { e.printStackTrace(); } } public static void main(String[] args) throws SQLException { NewsList newsList = new NewsList(); /*String url = "https://3g.163.com/touch/reconstruct/article/list/"; String [] typeArray = {"BBM54PGAwangning","BA10TA81wangning","BA8E6OEOwangning" ,"BA8EE5GMwangning","BAI67OGGwangning","BA8D4A3Rwangning","BAI6I0O5wangning" ,"BAI6JOD9wangning","BA8F6ICNwangning","BAI6RHDKwangning","BA8FF5PRwangning" ,"BDC4QSV3wangning","BEO4GINLwangning"}; for (int i = 0; i < typeArray.length; i++) { }*/ newsList.deleteAll(); newsList.addXW("https://3g.163.com/touch/reconstruct/article/list/BBM54PGAwangning/0-20.html"); newsList.addYL("https://3g.163.com/touch/reconstruct/article/list/BA10TA81wangning/0-20.html"); newsList.addTY("https://3g.163.com/touch/reconstruct/article/list/BA8E6OEOwangning/0-20.html"); newsList.addCJ("https://3g.163.com/touch/reconstruct/article/list/BA8EE5GMwangning/0-20.html"); newsList.addJS("https://3g.163.com/touch/reconstruct/article/list/BAI67OGGwangning/0-20.html"); newsList.addKJ("https://3g.163.com/touch/reconstruct/article/list/BA8D4A3Rwangning/0-20.html"); newsList.addSJ("https://3g.163.com/touch/reconstruct/article/list/BAI6I0O5wangning/0-20.html"); newsList.addSM("https://3g.163.com/touch/reconstruct/article/list/BAI6JOD9wangning/0-20.html"); newsList.addSS("https://3g.163.com/touch/reconstruct/article/list/BA8F6ICNwangning/0-20.html"); newsList.addYX("https://3g.163.com/touch/reconstruct/article/list/BAI6RHDKwangning/0-20.html"); newsList.addJY("https://3g.163.com/touch/reconstruct/article/list/BA8FF5PRwangning/0-20.html"); newsList.addJK("https://3g.163.com/touch/reconstruct/article/list/BDC4QSV3wangning/0-20.html"); newsList.addLY("https://3g.163.com/touch/reconstruct/article/list/BEO4GINLwangning/0-20.html"); newsList.zw(); newsList.deleteBad(); // newsList.test(); } /** * type : 军事 * @param url * @throws SQLException */ public void addJS(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI67OGGwangning().size(); i++) { News n = newsListData.getBAI67OGGwangning().get(i); n.setType("军事"); dao.add(n); } } /** * type : 旅游 * @param url * @throws SQLException */ public void addLY(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBEO4GINLwangning().size(); i++) { News n = newsListData.getBEO4GINLwangning().get(i); n.setType("旅游"); dao.add(n); } } /** * type : 健康 * @param url * @throws SQLException */ public void addJK(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBDC4QSV3wangning().size(); i++) { News n = newsListData.getBDC4QSV3wangning().get(i); n.setType("健康"); dao.add(n); } }/** * type : 教育 * @param url * @throws SQLException */ public void addJY(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8FF5PRwangning().size(); i++) { News n = newsListData.getBA8FF5PRwangning().get(i); n.setType("教育"); dao.add(n); } }/** * type : 游戏 * @param url * @throws SQLException */ public void addYX(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI6RHDKwangning().size(); i++) { News n = newsListData.getBAI6RHDKwangning().get(i); n.setType("游戏"); dao.add(n); } }/** * type : 时尚 * @param url * @throws SQLException */ public void addSS(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8F6ICNwangning().size(); i++) { News n = newsListData.getBA8F6ICNwangning().get(i); n.setType("时尚"); dao.add(n); } }/** * type : 数码 * @param url * @throws SQLException */ public void addSM(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI6JOD9wangning().size(); i++) { News n = newsListData.getBAI6JOD9wangning().get(i); n.setType("数码"); dao.add(n); } }/** * type : 手机 * @param url * @throws SQLException */ public void addSJ(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBAI6I0O5wangning().size(); i++) { News n = newsListData.getBAI6I0O5wangning().get(i); n.setType("手机"); dao.add(n); } } /** * type : 科技 * @param url * @throws SQLException */ public void addKJ(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8D4A3Rwangning().size(); i++) { News n = newsListData.getBA8D4A3Rwangning().get(i); n.setType("科技"); dao.add(n); } } /** * type : 财经 * @param url * @throws SQLException */ public void addCJ(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8EE5GMwangning().size(); i++) { News n = newsListData.getBA8EE5GMwangning().get(i); n.setType("财经"); dao.add(n); } } /** * type : 体育 * @param url * @throws SQLException */ public void addTY(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA8E6OEOwangning().size(); i++) { News n = newsListData.getBA8E6OEOwangning().get(i); n.setType("体育"); dao.add(n); } } /** * type : 娱乐 * @param url * @throws SQLException */ public void addYL(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBA10TA81wangning().size(); i++) { News n = newsListData.getBA10TA81wangning().get(i); n.setType("娱乐"); dao.add(n); } } /** * 新闻 * @param url * @throws SQLException */ public void addXW(String url) throws SQLException { String data = HttpUtil.setUrl(url); Gson gson = new Gson(); String ss = data.substring(9,data.length()-1); NewsListData newsListData = gson.fromJson(ss, NewsListData.class); for (int i = 0; i < newsListData.getBBM54PGAwangning().size(); i++) { News n = newsListData.getBBM54PGAwangning().get(i); n.setType("新闻"); dao.add(n); } } /** * 正文 * @throws SQLException */ public void zw() throws SQLException { List<News> news = dao.newsList(); for (int i = 0; i < news.size(); i++) { News n = news.get(i); String s = HttpUtil.setUrl(n.getUrl()); String zw = JsoupNewsUtil.zw(s); dao.zw(zw,n.getId()); } } /** * 清空 * @throws SQLException */ public void deleteAll() throws SQLException { dao.deleteAll(); } /** * 测试 */ public void test (){ Gson gson = new Gson(); News news = gson.fromJson("{\n" + " \"imgextra\": [\n" + " {\n" + " \"imgsrc\": \"http://cms-bucket.ws.126.net/2020/0410/4ef9af5aj00q8jdsh00bpc000sg00sgc.jpg\"\n" + " },\n" + " {\n" + " \"imgsrc\": \"http://cms-bucket.ws.126.net/2020/0410/6f7bd38bj00q8jdsh003xc000sg00dic.jpg\"\n" + " }\n" + " ],\n" + " \"liveInfo\": null,\n" + " \"docid\": \"17KK0006|2145433\",\n" + " \"source\": \"极客鲜疯队\",\n" + " \"title\": \"宅家赏美丽高原 一生中值得一看的美景\",\n" + " \"priority\": 150,\n" + " \"url\": \"17KK0006|2145433\",\n" + " \"skipURL\": \"http://3g.163.com/touch/photoview.html?channelid=0006&setid=2145433\",\n" + " \"commentCount\": 15,\n" + " \"imgsrc3gtype\": \"2\",\n" + " \"stitle\": \"17KK0006|2145433\",\n" + " \"digest\": \"\",\n" + " \"skipType\": \"photoset\",\n" + " \"photosetID\": \"0006|2145433\",\n" + " \"imgsrc\": \"http://cms-bucket.ws.126.net/2020/0410/1bd79be9j00q8jdsh008tc000sg00izc.jpg\",\n" + " \"ptime\": \"2020-04-10 03:51:04\",\n" + " \"modelmode\": \"u\"\n" + " }", News.class); System.out.println(news.toString()); } }
4、自己封装的工具类:
1)数据库连接
package com.me.utils; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import javax.sql.DataSource; import com.mchange.v2.c3p0.ComboPooledDataSource; public class DBUtils { private static DataSource dataSource = new ComboPooledDataSource(); private static ThreadLocal<Connection> tl = new ThreadLocal<Connection>(); // 直接可以获取一个连接池 public static DataSource getDataSource() { return dataSource; } public static Connection getConnection() throws SQLException{ return dataSource.getConnection(); } // 获取连接对象 public static Connection getCurrentConnection() throws SQLException { Connection con = tl.get(); if (con == null) { con = dataSource.getConnection(); tl.set(con); } return con; } // 开启事务 public static void startTransaction() throws SQLException { Connection con = getCurrentConnection(); if (con != null) { con.setAutoCommit(false); } } // 事务回滚 public static void rollback() throws SQLException { Connection con = getCurrentConnection(); if (con != null) { con.rollback(); } } // 提交并且 关闭资源及从ThreadLocall中释放 public static void commitAndRelease() throws SQLException { Connection con = getCurrentConnection(); if (con != null) { con.commit(); // 事务提交 con.close();// 关闭资源 tl.remove();// 从线程绑定中移除 } } // 关闭资源方法 public static void closeConnection() throws SQLException { Connection con = getCurrentConnection(); if (con != null) { con.close(); } } public static void closeStatement(Statement st) throws SQLException { if (st != null) { st.close(); } } public static void closeResultSet(ResultSet rs) throws SQLException { if (rs != null) { rs.close(); } } }
2)网络请求发送
package com.me.utils; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; public class HttpUtil { /** * 返回json * @param setUrl * @return */ public static String setUrl(String setUrl){ try { URL url = new URL(setUrl); HttpURLConnection conn = (HttpURLConnection)url.openConnection(); conn.setConnectTimeout(5000); conn.setRequestMethod("GET"); int responseCode = conn.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK){ InputStream inputStream = conn.getInputStream(); InputStreamReader inputStreamReader = new InputStreamReader(inputStream); BufferedReader reader = new BufferedReader(inputStreamReader); StringBuffer stringBuffer = new StringBuffer(); String string = reader.readLine(); while (string != null) { stringBuffer.append(string); string = reader.readLine(); } return stringBuffer.toString(); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return ""; } }
3)html代码解析
package com.me.utils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class JsoupNewsUtil { /** * * @param data * @return News_dg */ public static String zw(String data){ Document document = Jsoup.parse(data); //获取新闻的内容 Elements content = document.getElementsByClass("content"); return content.text().trim(); } }
5、dao层
package com.me.dao; import com.me.domain.News; import com.me.utils.DBUtils; import org.apache.commons.dbutils.QueryRunner; import org.apache.commons.dbutils.handlers.BeanListHandler; import java.sql.SQLException; import java.util.List; public class NewsDao { public boolean deleteBad() throws SQLException { QueryRunner qr =new QueryRunner(DBUtils.getDataSource()); String sql="delete from newslist where zw = null or zw=?"; int n = qr.update(sql,""); if (n > 0) { return true; } else { return false; } } public boolean deleteAll() throws SQLException { QueryRunner qr =new QueryRunner(DBUtils.getDataSource()); String sql="delete from newslist "; int n = qr.update(sql); if (n > 0) { return true; } else { return false; } } /** * * @param zw * @param id * @return * @throws SQLException */ public boolean zw(String zw,int id) throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "update newslist set zw = ? where id=? "; int n = qr.update(sql, zw,id); if (n > 0) { return true; } else { return false; } } /** * * @return List<News> * @throws SQLException */ public List<News> newsList() throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "select * from newslist where url != null or url != '17KK0006|2145432'or url != ?"; List<News> query = qr.query(sql, new BeanListHandler<News>(News.class),""); return query; } /** * * @param world * @return * @throws SQLException */ public List<News> search(String world) throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "select * from newslist where title like '%"+world+"%' limit 0 , 5"; System.out.println(sql); List<News> query = qr.query(sql, new BeanListHandler<News>(News.class)); return query; } /** * * @param news * @return * @throws SQLException */ public boolean add(News news) throws SQLException { QueryRunner qr = new QueryRunner(DBUtils.getDataSource()); String sql = "insert into newslist (source,title,priority,url,commentCount,digest,imgsrc,ptime,type) " + "values(?,?,?,?,?,?,?,?,?)"; int update = qr.update(sql,news.getSource(),news.getTitle(),news.getPriority(),news.getUrl(),news.getCommentCount(), news.getDigest(),news.getImgsrc(),news.getPtime(),news.getType()); if (update > 0) { return true; } else { return false; } } }
6、servlet,编写的api数据源接口
package com.me.servlet; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import javax.servlet.ServletException; import javax.servlet.annotation.WebServlet; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import com.google.gson.Gson; import com.me.dao.NewsDao; import com.me.domain.News; @WebServlet("/news") public class NewsServlet_ extends HttpServlet { private static final long serialVersionUID = 1L; private NewsDao dao = new NewsDao(); public NewsServlet_() { super(); } protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { response.setHeader("content-type", "text/html;charset=UTF-8"); response.setCharacterEncoding("UTF-8"); String method = request.getParameter("method"); if (method.equals("allnews")){ allnews(request,response); }else if (method.equals("search")){ search(request,response); } } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } private void search(HttpServletRequest request, HttpServletResponse response) throws IOException { String word = request.getParameter("word"); List<News> news = new ArrayList<News>(); try { news = dao.search(word); } catch (SQLException e) { e.printStackTrace(); } Gson gson = new Gson(); String s = gson.toJson(news); response.getWriter().write(s); } private void allnews(HttpServletRequest request, HttpServletResponse response) throws IOException { List<News> news = new ArrayList<News>(); try { news = dao.newsList(); } catch (SQLException e) { e.printStackTrace(); } Gson gson = new Gson(); String s = gson.toJson(news); response.getWriter().write(s); } }