Java实现web页面内容抓取
1 package demo; 2 3 import java.io.BufferedReader; 4 import java.io.IOException; 5 import java.io.InputStream; 6 import java.io.InputStreamReader; 7 8 /** 9 * web页面内容抓取 10 * @author sy 11 * 12 */ 13 public class GrabWebHtml{ 14 15 public static void main(String[] args) { 16 String url="http://www.baidu.com"; 17 System.out.println(getWebHtml(url)); 18 } 19 20 public static String getWebHtml(String domain) { 21 StringBuffer sb = new StringBuffer(); 22 InputStream is = null; 23 InputStreamReader isr = null; 24 BufferedReader in = null; 25 try { 26 java.net.URL url = new java.net.URL(domain); 27 is = url.openStream(); 28 isr = new InputStreamReader(is,"utf-8"); 29 in = new BufferedReader(isr); 30 String line; 31 while ((line = in.readLine()) != null) { 32 sb.append(line).append("\n"); 33 } 34 in.close(); 35 36 } catch (IOException e) { 37 e.printStackTrace(); 38 }finally { 39 try { 40 if(in!=null){ 41 in.close(); 42 in=null; 43 } 44 if(isr!=null){ 45 isr.close(); 46 isr=null; 47 } 48 if(is!=null){ 49 is.close(); 50 is=null; 51 } 52 } catch (IOException e) { 53 e.printStackTrace(); 54 } 55 } 56 return sb.toString(); 57 } 58 59 }
==========================================路漫漫其修远兮========================================== ====================== 知识改变生活 ====================== ====================== 技术改变命运 ====================== ====================== 我是一名普通的程序员 ====================== ====================== 希望通过自己的努力来使自己人生更加绚烂多彩 ====================== ==========================================吾将上下而求索==========================================