正则表达式中的多行和非贪婪模式
多行就是在前边加"(?s)"
非贪婪模式就是在*、?、+ 后边再加?
例子:
URL url = new URL("http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/600000.phtml"); URLConnection conn = url.openConnection(); InputStream is = conn.getInputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] arr = new byte[1024]; int len = -1; while((len = is.read(arr)) > 0) { out.write(arr); } String content = out.toString("GBK"); System.out.println(content); String regstr = "(?s)<a target='_blank'\\s+href='http://vip.stock.finance.sina.com.cn/quotes_service/view/vMS_tradehistory.php\\?symbol=\\w{2}\\d{6}&date=\\d{4}-\\d{2}-\\d{2}'>" + "\\s+(\\d{4}-\\d{2}-\\d{2})\\s+.*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+"; Pattern p = Pattern.compile(regstr); Matcher m = p.matcher(content); while(m.find()){ System.out.println(m.group()); System.out.println(m.group(1)); System.out.println(m.group(2)); System.out.println(m.group(3)); System.out.println(m.group(4)); System.out.println(m.group(5)); System.out.println(m.group(6)); System.out.println(m.group(7)); } //System.out.println(content); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }