java调用Linux执行Python爬虫,并将数据存储到elasticsearch中--(java后台代码)
该篇博客主要是java代码,如需相应脚本及java连接elasticsearch工具类代码,请移步到上一篇博客(https://www.cnblogs.com/chenyuanbo/p/9973685.html)
一、创建连接执行Linux脚本工具类
package com.yjlc.platform.utils.Elasticsearch; import ch.ethz.ssh2.Connection; import ch.ethz.ssh2.StreamGobbler; import java.io.*; /** * -------------------------------------------------------------- * CopyRights(c)2018,YJLC * All Rights Reserved * <p> * FileName: SingletonUtil.java * Description: * Author: cyb * CreateDate: 2018-11-15 * -------------------------------------------------------------- */ public class SingletonUtil { //无参构造 private SingletonUtil(){} private volatile static SingletonUtil instance; //字符编码默认是utf-8 public static String DEFAULTCHART="UTF-8"; public static Connection conn; private String ip; private String userName; private String userPwd; public static Boolean flag=false; //有参构造 public SingletonUtil(String ip, String userName, String userPwd) { this.ip = ip; this.userName = userName; this.userPwd = userPwd; } public SingletonUtil getInstance(String ip, String userName, String userPwd){ if(instance==null){ synchronized(SingletonUtil.class){ //防止多线程多次创建 if(instance==null){ instance=new SingletonUtil(ip,userName, userPwd); } } } flag= instance.login();//调用登录方法 return instance; } //登录 public Boolean login(){ boolean flg=false; try { System.out.println("进入连接"); conn = new Connection(ip); try { conn.connect();//连接 } catch (IOException e) { e.printStackTrace(); } flg=conn.authenticateWithPassword(userName, userPwd);//认证 if (flg){ System.out.println("认证成功!"); } } catch (IOException e) { e.printStackTrace(); } return flg; } /** *@description:纯文本格式返回 *@author:cyb *@date: 2018-11-15 16:56 *@param: in *@param: charset *@return: java.lang.String */ public static String processStdout(InputStream in, String charset){ InputStream stdout = new StreamGobbler(in); StringBuffer buffer = new StringBuffer();; try { BufferedReader br = new BufferedReader(new InputStreamReader(stdout,charset)); String line=null; while((line=br.readLine()) != null){ buffer.append(line+"\n"); } } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return buffer.toString(); } }
二、控制层
/** *@description:开启爬虫 *@author:cyb *@date: 2018-11-14 15:59 *@param: id *@param: execute *@return: java.util.Map<java.lang.String,java.lang.Object> */ @RequestMapping("openTask") @ResponseBody public Map<String,Object> openTask(String id,Boolean execute){ Map<String,Object> map = new HashMap<>(); //根据id查询任务详细信息 BsKnowledgeInfoDTO knowledgeInfoDTO= knolegeService.getDataInfoById(id); if(execute==true){ execute=false; }else { execute=true; } knowledgeInfoDTO.setExecute(execute);//修改任务的状态(开启、关闭) int k = knolegeService.updateDataInfo(knowledgeInfoDTO); // StringBuilder url = new StringBuilder(knowledgeInfoDTO.getPath()) ;//爬虫目标路径 StringBuilder url= new StringBuilder("https://mil.news.sina.com.cn/"); StringBuilder reptileMethod= new StringBuilder("http://192.168.200.8:8000/news");//爬虫方法http://192.168.200.8:8000/news StringBuilder themeid= new StringBuilder("hottopic");//存储索引名称 //http://192.168.200.8:8000/news?themeid=hottopic&url=https://mil.news.sina.com.cn/history/2018-11-15/doc-ihmutuec0443667.shtml StringBuilder path =reptileMethod.append("?").append("themid=").append(themeid).append("&").append("url=").append(url); String ip="192.168.200.8";//Linux 路径 String userName ="root"; String userPwd ="yjlc20148"; int w = knolegeService.reptile(path.toString(),ip,userName,userPwd); if(w==200){ map.put("code",200); map.put("message","爬虫成功!"); }else if(w==206){ map.put("code",206); map.put("message","连接失败!"); } return map; }
三、service层(此处省略了service接口层)
/** *@description: 爬虫 *@author:cyb *@date: 2018-11-15 20:52 *@param: path 爬虫方法路径+ES存储索引+爬虫目标url合集 *@param: ip 连接ip地址 *@param: userName :用户名 *@param: userPwd:用户密码 *@return: int */ @Override public int reptile(String path,String ip,String userName,String userPwd) { SingletonUtil singletonUtil = new SingletonUtil("192.168.200.8", "root","yjlc20148"); singletonUtil.getInstance(ip, userName,userPwd); Boolean b =SingletonUtil.flag;//看是否连接成功 if(b==true){ System.out.println("=====第一个步骤====="); Session session= null;//打开一个会话 try { session = singletonUtil.conn.openSession(); session.execCommand("sh /opt/zc/linux_sina.sh");//执行命令 } catch (IOException e) { e.printStackTrace(); } //TODO:多条命令 String result=singletonUtil.processStdout(session.getStdout(),singletonUtil.DEFAULTCHART); //如果为得到标准输出为空,说明脚本执行出错了 if(StringUtils.isBlank(result)){ System.out.println("脚本出错"); result=singletonUtil.processStdout(session.getStderr(),singletonUtil.DEFAULTCHART); } System.out.println("第一个步骤脚本运行成功"+result); ConnectNetworkUtil connectNetworkUtil = new ConnectNetworkUtil(); connectNetworkUtil.ConnectNetwork(path); System.out.println("采集成功!"); session.close();//关闭session singletonUtil.conn.close();//爬虫关闭连接 return 200;//爬虫成功 }else { return 206;//连接失败 } }
以上代码已省略了service接口层和java连接elasticsearch工具类(上一篇博客中已写到),以上代码仅供参考,若代码中有不合理或者不规范的地方,请各位指出,技术在于交流!