正则表达式 <A HREF>
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RegexDemo {
/**
* @param args
* @throws FileNotFoundException
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
int count = 0;
String fileName = "D:\\Users\\Administrator\\Desktop\\forRegex.txt";
StringBuffer buffer = new StringBuffer(10240);
FileReader fr = new FileReader(fileName);
BufferedReader br = new BufferedReader(fr);
while (br.ready()) {
// System.out.println();
buffer.append(br.readLine());
}
// 在HTML中取<a>到</a>
// String regex =
//"(<a\\s+([^>h]|h(?!ref\\s))*href[\\s+]?=[\\s+]?('|\"))([^(\\s+|'|\")]*)([^>]*>)(.*?)</a>";
// 在<a></a>中取href
// String
// regex="(?<=[\\s+]?href[\\s+]?=[\\s+]?('|\")?)[^(\"|')>]+?(?=\"|')";
// 在<a></a>中同时取href与名称,使用group(4)与group(1)
// String regex="((?<=[\\s+]?href[\\s+]?=[\\s+]?('|\")?)[^\"|'>]+?(?=\"|'))(.+?)?((?<=>)(.+?)?(?=</a>))";
// 在<a></a>中取包含'登录'的链接
//String regex = "((?<=[\\s+]?href[\\s+]?=[\\s+]?('|\")?)[^\"|'>]+?(?=\"|'))(.+?)?((?<=>).*登录.*(?=</a>))";
//在html中同时取href与名称,使用group(3)与 group(5)
String regex="<a\\s+([^>h]|h(?!ref\\s))*(?<=[\\s+]?href[\\s+]?=[\\s+]?('|\")?)([^\"|'>]+?(?=\"|'))(.+?)?((?<=>)(.+?)?(?=</a>))";
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(buffer.toString());
System.out.println("INPUT: " + buffer);
System.out.println("REGEX: " + regex + "\r\n");
while (m.find()) {
System.out.println(m.group(3) + ":" + m.group(5));
//System.out.println(m.group(0) );
count++;
}
if (count == 0) {
System.out.println("NO MATCHES: ");
}
System.out.println("TOTAL:" + count);
}
}
//BUG还是有的,花了一晚上时间
躲猫猫社团团长 http://t.sina.com.cn/coolria