Java正则表达式获取网址和链接文字解析
很久不写正则表达式了,网上看到个面试题关于提取<a href=‘提取我’/>, 各种乱七八糟的解法。
正确的,应该是这样吧:
import java.util.regex.Matcher; import java.util.regex.Pattern; public class TestReg { static void ParseHref(String str) { System.out.println(str); System.out.println("开始匹配"); Pattern pattern = Pattern.compile("(<a([^>]*)href=[\"|\']([^\"\']*)[\"|\']([^>]*)>)", Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(str); while(matcher.find()) { System.out.println("group:" + matcher.group());; for(int i = 0 ; i <= matcher.groupCount() ; i++){ System.out.println(""+ i + ": " + matcher.group(i)); } } } public static void main(String[] args) { String str = "<a href=\"www.google.com/sadfs/\" name=\"myname\" />123123 href=\"error.google.com/sadfs/\"55555"; ParseHref(str); String s="</p><p style=height:14px><a href=\"http://jingjia.baidu.com\">企业推广</a> | <a href=\"http://top.baidu.com\">搜索风云榜</a> | <a href=\"/home.html\">关于百度</a> | <a href=\'http://ir.baidu.com\'>About Baidu</a></p><p id=b>©2008 Baidu <a href=\'http://www.baidu.com/duty\'>使用百度前必读</a> <a href=\'http://www.miibeian.gov.cn\' target=_blank>京ICP证030173号</a> <a href=\'http://www.hd315.gov.cn/beian/view.asp?bianhao=010202001092500412\'><img src=http://gimg.baidu.com/img/gs.gif></a></p></center></body></html><!--543ff95f18f36b11-->"; ParseHref(s); } }
<a href="www.google.com/sadfs/" name="myname" />123123 href="error.google.com/sadfs/"55555 开始匹配 group:<a href="www.google.com/sadfs/" name="myname" /> 0: <a href="www.google.com/sadfs/" name="myname" /> 1: <a href="www.google.com/sadfs/" name="myname" /> 2: 3: www.google.com/sadfs/ 4: name="myname" / </p><p style=height:14px><a href="http://jingjia.baidu.com">企业推广</a> | <a href="http://top.baidu.com">搜索风云榜</a> | <a href="/home.html">关于百度</a> | <a href='http://ir.baidu.com'>About Baidu</a></p><p id=b>©2008 Baidu <a href='http://www.baidu.com/duty'>使用百度前必读</a> <a href='http://www.miibeian.gov.cn' target=_blank>京ICP证030173号</a> <a href='http://www.hd315.gov.cn/beian/view.asp?bianhao=010202001092500412'><img src=http://gimg.baidu.com/img/gs.gif></a></p></center></body></html><!--543ff95f18f36b11--> 开始匹配 group:<a href="http://jingjia.baidu.com"> 0: <a href="http://jingjia.baidu.com"> 1: <a href="http://jingjia.baidu.com"> 2: 3: http://jingjia.baidu.com 4: group:<a href="http://top.baidu.com"> 0: <a href="http://top.baidu.com"> 1: <a href="http://top.baidu.com"> 2: 3: http://top.baidu.com 4: group:<a href="/home.html"> 0: <a href="/home.html"> 1: <a href="/home.html"> 2: 3: /home.html 4: group:<a href='http://ir.baidu.com'> 0: <a href='http://ir.baidu.com'> 1: <a href='http://ir.baidu.com'> 2: 3: http://ir.baidu.com 4: group:<a href='http://www.baidu.com/duty'> 0: <a href='http://www.baidu.com/duty'> 1: <a href='http://www.baidu.com/duty'> 2: 3: http://www.baidu.com/duty 4: group:<a href='http://www.miibeian.gov.cn' target=_blank> 0: <a href='http://www.miibeian.gov.cn' target=_blank> 1: <a href='http://www.miibeian.gov.cn' target=_blank> 2: 3: http://www.miibeian.gov.cn 4: target=_blank group:<a href='http://www.hd315.gov.cn/beian/view.asp?bianhao=010202001092500412'> 0: <a href='http://www.hd315.gov.cn/beian/view.asp?bianhao=010202001092500412'> 1: <a href='http://www.hd315.gov.cn/beian/view.asp?bianhao=010202001092500412'> 2: 3: http://www.hd315.gov.cn/beian/view.asp?bianhao=010202001092500412 4: