爬网页获取电话号码
明天补充
1 import java.io.BufferedReader; 2 import java.io.IOException; 3 import java.io.InputStream; 4 import java.io.InputStreamReader; 5 import java.net.URL; 6 import java.net.URLConnection; 7 import java.util.regex.Matcher; 8 import java.util.regex.Pattern; 9 10 public class URLTest { 11 12 private URLConnection connection = null; 13 14 public String getDoucument(String url) throws IOException { 15 16 URL newUrl = new URL(url); 17 connection = newUrl.openConnection(); 18 InputStream is = connection.getInputStream(); 19 InputStreamReader isr = new InputStreamReader(is); 20 BufferedReader br = new BufferedReader(isr); 21 22 String line = ""; 23 StringBuffer sb = new StringBuffer(); 24 25 while( (line = br.readLine()) != null) { 26 sb.append(line +"\n"); 27 } 28 29 return sb.toString(); 30 31 } 32 33 public String MyFilter( String string ) {//内容过滤器-获取网页上的电话,没有去重 34 35 String regex = "18[\\d]{9}"; 36 Pattern pattern = Pattern.compile(regex); 37 Matcher matcher = pattern.matcher(string); 38 39 String result = ""; 40 while(matcher.find()) { 41 result += matcher.group()+"\n"; 42 } 43 44 return result; 45 } 46 47 public static void main(String[] args) throws IOException { 48 49 URLTest test = new URLTest(); 50 String page = test.getDoucument("https://power.baidu.com/question/1511959129473915060.html?qbl=relate_question_1"); 51 String result = test.MyFilter(page); 52 // System.out.println(page); 53 54 if ( result != null ) { 55 System.out.println("从该网页上找到的号码:\n"+result); 56 } 57 else { 58 System.out.println("该网页上没有电话号码"); 59 } 60 61 } 62 }