团队项目开发冲刺日(三)
今日一整天没有学习团队项目相关知识,仅仅读了这篇博客:https://www.cnblogs.com/andzhang/p/6075814.html。
完成了昨天老师布置的词云任务,下面附上相关代码:
python:爬取,存入数据库
1 import re 2 import requests 3 from bs4 import BeautifulSoup 4 import pymysql 5 6 num = 0; 7 def insertdata(data,n): 8 conn=pymysql.connect("localhost","root","Inazuma","paqu",charset='utf8') 9 cur=conn.cursor() 10 sql="INSERT INTO lunwen(Timu,Zhaiyao,Lianjie) VALUES(%s,%s,%s)" 11 try: 12 cur.execute(sql,data) 13 conn.commit() 14 print(n) 15 except: 16 conn.rollback() 17 print("ERROR") 18 conn.close() 19 20 #网址,这个网站提供了CVPR论文的题目,链接信息,还有pdf格式的论文全文 21 r = requests.get('http://openaccess.thecvf.com/ICCV2019.py') 22 soup = BeautifulSoup(r.text,'lxml') 23 #简单的按条件寻找标签 24 for item in soup.find_all('a',href=re.compile('content_ICCV_2019/html/')): 25 num=num+1 26 timu = item.string 27 lianjie = item['href'] 28 #这里拿出的'href'没有http那段开头,想再读取链接进入网址,我在前面拼了上去,这个网址里可以获取摘要信息 29 ra = requests.get('http://openaccess.thecvf.com/'+item['href']) 30 soupa = BeautifulSoup(ra.text, 'lxml') 31 diva = soupa.find(attrs={"id": "abstract"}) 32 #根据给出的BUG信息,网址有的论文点进去发生了Not Found错误,需要加个判断语句 33 if(diva == None): 34 zhaiyao=-1 35 else: 36 zhaiyao=diva.string 37 insertdata((timu,zhaiyao,lianjie),num)
java代码:读库,根据关键字分类,界面显示:
后台servlet+dao层:
CVPRServlet,读库,统计分类,由于使用Eclipse创建Servlet文件都是统一样式,上面的就不粘了,只粘doPost:
1 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 2 // TODO Auto-generated method stub 3 request.setCharacterEncoding("utf-8"); 4 response.setContentType("text/html;charset=UTF-8"); 5 Map<String,Integer>sortMap=CloudDao.getallmax(); 6 JSONArray json=new JSONArray(); 7 int k=0; 8 for(Map.Entry<String, Integer>entry:sortMap.entrySet()) { 9 JSONObject job=new JSONObject(); 10 job.put("Kname", entry.getKey()); 11 job.put("Tvalue", entry.getValue()); 12 if(!entry.getKey().equals("for")||entry.getKey().equals("and")||entry.getKey().equals("With")||entry.getKey().equals("of") 13 ||entry.getKey().equals("in")||entry.getKey().equals("From")||entry.getKey().equals("A")||entry.getKey().equals("to") 14 ||entry.getKey().equals("a")||entry.getKey().equals("the")||entry.getKey().equals("by")) { 15 json.add(job); 16 k++; 17 } 18 if(k==30) { 19 break; 20 } 21 } 22 response.getWriter().write(json.toString()); 23 }
读取的Map在这里;
1 package Dao; 2 3 import java.sql.Connection; 4 import java.sql.ResultSet; 5 import java.sql.Statement; 6 import java.util.HashMap; 7 import java.util.LinkedHashMap; 8 import java.util.Map; 9 10 import util.DBUtil; 11 12 public class CloudDao { 13 public static Map<String, Integer>getallmax(){ 14 String sql="select * from lunwen"; 15 Map<String, Integer>map=new HashMap<String, Integer>(); 16 Connection conn=null; 17 Statement state=null; 18 ResultSet res=null; 19 conn=DBUtil.getConn(); 20 try { 21 state=conn.createStatement(); 22 res=state.executeQuery(sql); 23 while(res.next()) { 24 String timu=res.getString("Timu"); 25 String[]keyword=timu.split(" "); 26 for(int i=0; i<keyword.length; i++) { 27 if(map.get(keyword[i])==null) { 28 map.put(keyword[i], 0); 29 }else { 30 map.replace(keyword[i], map.get(keyword[i])+1); 31 } 32 } 33 } 34 }catch(Exception e) { 35 e.printStackTrace(); 36 } 37 DBUtil.close(res, state, conn); 38 Map<String, Integer>sorted=new LinkedHashMap<>(); 39 map.entrySet().stream() 40 .sorted(Map.Entry.<String,Integer>comparingByValue().reversed()) 41 .forEachOrdered(x->sorted.put(x.getKey(), x.getValue())); 42 return sorted; 43 } 44 }
先将库中的论文名提取出来,分开(是一段句子),按单词作为Key,统计出现个数,排序,传到CVPRServlet,最后再取前30个。
findServlet:点击词云某词时显示所有包含这个词的论文题目的题目以及链接,同样只粘贴doPost:
1 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 2 // TODO Auto-generated method stub 3 request.setCharacterEncoding("utf-8"); 4 response.setContentType("text/html;charset=UTF-8"); 5 String word=request.getParameter("word"); 6 response.getWriter().write(TitleDao.searchTitle(word)); 7 }
dao层代码:按词模糊查询,返回json数组:
1 package Dao; 2 3 import java.sql.Connection; 4 import java.sql.ResultSet; 5 import java.sql.Statement; 6 7 import net.sf.json.JSONArray; 8 import net.sf.json.JSONObject; 9 import util.DBUtil; 10 11 public class TitleDao { 12 public static String searchTitle(String word) { 13 JSONArray jsonarrayC=new JSONArray(); 14 Connection con=DBUtil.getConn(); 15 Statement state=null; 16 //读取数据库 17 String sql="select Timu from lunwen where Timu like '%"+word+"%'"; 18 String timustr=""; 19 ResultSet res=null; 20 try { 21 JSONObject jsonobC=new JSONObject(); 22 state=con.createStatement(); 23 res=state.executeQuery(sql); 24 while(res.next()) { 25 timustr=timustr+res.getString("Timu")+","; 26 } 27 res.close(); 28 String str[]=timustr.split(","); 29 for(int i=0;i<str.length;i++) { 30 sql="select Lianjie from lunwen where Timu='"+str[i]+"'"; 31 res=state.executeQuery(sql); 32 res.next(); 33 String lianjie=res.getString("Lianjie"); 34 String Rlianjie="http://openaccess.thecvf.com/"+lianjie; 35 jsonobC.put("Title", str[i]); 36 jsonobC.put("Link", Rlianjie); 37 res.close(); 38 //写入JSONArray数组 39 jsonarrayC.add(jsonobC); 40 } 41 }catch(Exception e) { 42 e.printStackTrace(); 43 } 44 //类型转换,返回 45 return jsonarrayC.toString(); 46 } 47 }
最后是界面代码,参考博客:https://www.cnblogs.com/xiaofengzai/p/12702136.html
1 <!DOCTYPE html> 2 <html> 3 <head> 4 <meta charset="UTF-8"> 5 <title>词云</title> 6 <script type="text/javascript" src="js/jquery-1.11.0.min.js"></script> 7 <script src="js/echarts.js"></script> 8 <script src="js/echarts-wordcloud.js"></script> 9 </head> 10 <body> 11 <div id='show'style="display:block;overflow:auto;width:700px;height:1000px;float:left"></div> 12 <div id='main'style="width:700px;height:1000px;float:left"></div> 13 <script type="text/javascript"> 14 $(function(){ 15 echartsCloud(); 16 }); 17 18 function eConsole(param){ 19 if(typeof param.seriesIndex == 'undefined'){ 20 return; 21 } 22 if(param.type=='click'){ 23 var word=param.name; 24 var htmltext="<table style='text-align:center'><caption style='text-align:center'>论文题目与链接</caption>" 25 $.post( 26 'findServlet', 27 {'word':word}, 28 function(result){ 29 json=JSON.parse(result); 30 for(var i=0;i<json.length;i++){ 31 htmltext+="<tr><td><a target='_blank'href='"+json[i].Link+"'>"+json[i].Title+"</a></td></tr>" 32 } 33 htmltext+="</table>" 34 $("#show").html(htmltext); 35 } 36 ) 37 } 38 } 39 function echartsCloud(){ 40 $.ajax({ 41 url:"CVPRServlet", 42 type:"post", 43 datatype:"json", 44 async:true, 45 success:function(data){ 46 var json=JSON.parse(data); 47 var mydata=[]; 48 size=json.length; 49 for(i=0;i<size;i++){ 50 mydata.push({name:json[i].Kname,value:json[i].Tvalue}); 51 } 52 var myChart=echarts.init(document.getElementById('main')); 53 var ecConfig=echarts.config; 54 myChart.on('click',eConsole); 55 var option={ 56 title:{ 57 text:'词云' 58 }, 59 tooltip:{ 60 show:true 61 }, 62 series:[{ 63 type:'wordCloud', 64 shape:'smooth', 65 gridSize:8, 66 size:['50%','50%'], 67 rotationRange:[-45,0,45,90], 68 textStyle:{ 69 normal:{ 70 fontFamily:'微软雅黑', 71 color:function(){ 72 return 'rgb('+Math.round(Math.random()*255)+',' 73 +Math.round(Math.random()*255)+',' 74 +Math.round(Math.random()*255)+')' 75 } 76 }, 77 emphasis:{ 78 shadowBlur:5, 79 shadowColor:'#333' 80 } 81 }, 82 left:'center', 83 top:'center', 84 right:null, 85 bottom:null, 86 width:'100%', 87 height:'100%', 88 data:mydata 89 }] 90 }; 91 myChart.setOption(option); 92 } 93 }); 94 } 95 </script> 96 </body> 97 </html>
接下来看看效果:
明天任务:补上昨天预定的计划,接下来应该没有其他代码作业了。