Java解析采集模块
1 package step3; 2 3 import java.io.BufferedReader; 4 import java.io.BufferedWriter; 5 import java.io.File; 6 import java.io.FileReader; 7 import java.io.FileWriter; 8 import java.io.IOException; 9 import java.io.InputStream; 10 import java.io.InputStreamReader; 11 import java.io.PrintWriter; 12 import java.sql.ResultSet; 13 import java.sql.SQLException; 14 import java.sql.Statement; 15 import java.util.ArrayList; 16 import java.util.Calendar; 17 import java.util.List; 18 19 import org.apache.commons.httpclient.HttpClient; 20 import org.apache.commons.httpclient.methods.GetMethod; 21 import org.apache.commons.httpclient.methods.PostMethod; 22 import org.json.JSONObject; 23 import org.jsoup.Jsoup; 24 import org.jsoup.nodes.Document; 25 import org.jsoup.select.Elements; 26 27 import bean.Porn; 28 import util.DBConnection; 29 30 /** 31 * 32 * @ClassName: quhao 33 * @Description: 91porn地址解析 34 * @author zeze 35 * @date 2016年06月30日 下午7:55:31 36 * 37 */ 38 public class porn91 { 39 40 private static String cookie = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMQ.INJmWYMZ8T220CgsSTcfpHhTxXI"; 41 private static String cookie2 = "incap_ses_401_649914=31EbXVOgx0r6Ql5TmqOQBdjxdFcAAAAAu7MrrqICFZvpjsIw5VriGQ==; incap_ses_434_649914=wx2HcnWH7GDQCChRweAFBt/xdFcAAAAAczn9Ohl2VBPqxEd8kRi2GA==; incap_ses_407_649914=U4VYNM5iO1l1H0VP7/SlBWXydFcAAAAAifL73Yq/OnIgRqKWiWPqUg==; incap_ses_406_649914=8Ub/DfvqEGs9L9gFemeiBWEKdVcAAAAA+aBeDqKyWw37Sv+KZ4cdlA==; incap_ses_432_649914=bLzAYBXvVG0kSU6wyMX+BWUKdVcAAAAAZW+uykXgylzu/dZOu7IDWw==; _ga=GA1.2.1738858661.1466764840; _gat=1; visid_incap_649914=2hb3ym0OQ9C7sr1krqKCQTUObVcAAAAAQUIPAAAAAADQQCM/QP5jhCXO3+mlIKmg; incap_ses_199_649914=RkWbbfybyCoL2fxKs/3CAqIbdVcAAAAAOa+RJFdt35NV8xtM8MbP8Q==; session=eyJfZnJlc2giOmZhbHNlLCJjc3JmX3Rva2VuIjp7IiBiIjoiTkdFek9HRmtNakkxTldVM05EVXpZMkZoTldKaE5tWXpOV014TlRBNU1UZ3dPVGcyTkRNMU5BPT0ifX0.ClatMw.6MGC1jX7mgjsChpGFBd-xHTv9ZU"; 42 43 private static String Token = "1467296187##60ecf40d9328862cc6cd6a478adfc72ee0554050"; 44 45 private static String Url = "http://freeget.co/video/extraction"; 46 private static String url001 = null; 47 private static String dirfile = "F:/91porn/91url.csv"; 48 private static String destfile = "F:/91porn/data.txt"; 49 50 private static int cnt0 = 0; 51 52 private static String num = null; 53 private static String title = null; 54 private static String time = null; 55 private static String longtime = null; 56 private static String viewnum = null; 57 private static String Parurl = null;// "http://www.91porn.com/view_video.php?viewkey=c5ec60d0da8c8fbdb180&page=4&viewtype=basic&category=mr"; 58 59 public static void main(String[] args) throws InterruptedException { 60 61 File file = new File(dirfile); 62 FileReader reader = null; 63 BufferedReader br = null; 64 try { 65 reader = new FileReader(file); 66 br = new BufferedReader(reader); 67 String str = null; 68 String[] strArr = null; 69 int cnt = 0; 70 while ((str = br.readLine()) != null) { 71 // System.out.println(str); 72 strArr = str.split(","); 73 if (strArr.length != 7) 74 continue; 75 num = strArr[0]; 76 title = strArr[1]; 77 time = strArr[2]; 78 longtime = strArr[4]; 79 viewnum = strArr[5]; 80 Parurl = strArr[6]; 81 cnt++; 82 System.out.println(num + "," + title + "," + time); 83 func_step1(); 84 } 85 System.out.println("采集结束,总共:" + cnt + "条,成功写入" + cnt0 + "条"); 86 87 } catch (Exception e) { 88 // TODO: handle exception 89 e.printStackTrace(); 90 } finally { 91 if (br != null) { 92 try { 93 br.close(); 94 } catch (Exception e2) { 95 // TODO: handle exception 96 e2.printStackTrace(); 97 } 98 } 99 if (reader != null) { 100 try { 101 reader.close(); 102 } catch (Exception e2) { 103 // TODO: handle exception 104 e2.printStackTrace(); 105 } 106 } 107 } 108 109 } 110 111 private static void func_step1() { 112 HttpClient httpClient = new HttpClient(); 113 try { 114 PostMethod postMethod = new PostMethod(Url); 115 postMethod.getParams().setContentCharset("utf-8"); 116 // 每次访问需授权的网址时需 cookie 作为通行证 117 postMethod.setRequestHeader("cookie", cookie); 118 postMethod.setRequestHeader("X-CSRFToken", Token); 119 postMethod.setRequestHeader("Accept-Language", "zh-CN,zh;q=0.8"); 120 postMethod.setRequestHeader("Host", "freeget.co"); 121 postMethod.setRequestHeader("Referer", "http://freeget.co/"); 122 postMethod.setRequestHeader("User-Agent", 123 "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400"); 124 postMethod.setParameter("url", Parurl); 125 int statusCode = httpClient.executeMethod(postMethod);// 返回状态码200为成功,500为服务器端发生运行错误 126 System.out.println("返回状态码:" + statusCode); 127 // 打印出返回数据,检验一下是否成功 128 String result = postMethod.getResponseBodyAsString(); 129 if (statusCode == 200) { 130 // 解析成功,取得token和view_key 131 JSONObject a = new JSONObject(result); 132 url001 = "http://freeget.co/video/" + a.get("view_key") + "/" + a.get("token"); 133 System.out.println("视频解析地址:" + url001); 134 func_step2(url001); 135 } 136 } catch (Exception e) { 137 e.printStackTrace(); 138 } 139 } 140 141 private static void func_step2(String url) { 142 HttpClient httpClient = new HttpClient(); 143 try { 144 GetMethod getMethod = new GetMethod(url); 145 getMethod.getParams().setContentCharset("utf-8"); 146 getMethod.setRequestHeader("cookie", cookie2); 147 getMethod.setRequestHeader("Accept-Language", "zh-cn"); 148 getMethod.setRequestHeader("User-Agent", 149 "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) QQBrowser/9.2.5063.400"); 150 int statusCode = httpClient.executeMethod(getMethod);// 返回状态码200为成功,500为服务器端发生运行错误 151 // System.out.println("返回状态码:" + statusCode); 152 // 打印出返回数据,检验一下是否成功 153 InputStream inputStream = getMethod.getResponseBodyAsStream(); 154 BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); 155 StringBuffer stringBuffer = new StringBuffer(); 156 String str = ""; 157 while ((str = br.readLine()) != null) { 158 stringBuffer.append(str); 159 } 160 if (statusCode == 200) { 161 Document doc = Jsoup.parse(stringBuffer.toString()); 162 Elements name = doc.select("a"); 163 String playurl = name.get(4).text(); 164 System.out.println("在线播放地址:" + playurl); 165 writefile(playurl); 166 cnt0++; 167 } 168 } catch (Exception e) { 169 e.printStackTrace(); 170 } 171 } 172 173 private static void writefile(String url) { 174 FileWriter fw = null; 175 BufferedWriter bw = null; 176 PrintWriter pw = null; 177 try { 178 fw = new FileWriter(new File(destfile), true); 179 bw = new BufferedWriter(fw); 180 pw = new PrintWriter(bw); 181 pw.write(num + ',' + title + ',' + time + ',' + longtime + ',' + viewnum + ',' + url + "\r\n"); 182 } catch (IOException e) { 183 // TODO Auto-generated catch block 184 e.printStackTrace(); 185 } finally { 186 if (pw != null) { 187 pw.close(); 188 } 189 if (bw != null) { 190 try { 191 bw.close(); 192 } catch (IOException e) { 193 // TODO Auto-generated catch block 194 e.printStackTrace(); 195 } 196 } 197 if (fw != null) { 198 try { 199 fw.close(); 200 } catch (IOException e) { 201 // TODO Auto-generated catch block 202 e.printStackTrace(); 203 } 204 } 205 } 206 } 207 208 public List<Porn> QueryAllBook() { 209 java.sql.Connection connection = DBConnection.getConnection(); 210 String sql = "select * from porn where status=0"; 211 java.sql.PreparedStatement pstmt = DBConnection.getPreparedStatement(connection, sql); 212 List<Porn> pornlist = new ArrayList<Porn>(); 213 System.out.println(sql); 214 try { 215 Statement stmt = connection.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY); 216 java.sql.ResultSet rs = stmt.executeQuery(sql); 217 while (rs.next()) { 218 Porn porn = new Porn(); 219 porn.setNum(rs.getString(1)); 220 porn.setTitle(rs.getString(2)); 221 porn.setTime(rs.getString(3)); 222 porn.setViewkey(rs.getString(4)); 223 porn.setLongtime(rs.getString(5)); 224 porn.setViewnum(rs.getString(6)); 225 porn.setParurl(rs.getString(7)); 226 pornlist.add(porn); 227 } 228 rs.last(); 229 } catch (SQLException e) { 230 e.printStackTrace(); 231 } finally { 232 DBConnection.close(connection, pstmt, null); 233 } 234 return pornlist; 235 } 236 }