正则表达式分析网页,获得中央一电视节目列表
String strUrl="http://www.cctv.com/tvguide/11/01/20061010/1.shtml";
byte[] pageHtml = HttpUtil.getPage(strUrl);
//将页面转成string
String strHtml = new String(pageHtml, "GB2312");
String[][] ls= null;
ls = StringUtil.splitByReg(strHtml,"(\\d{2}:\\d{2}:\\d{2})</font>.*<font >(.+)</font>.*</tr>\\r\\n<tr>");
for(int i=0;i<ls.length;i++)
{
//String[] ls1[] = StringUtil.splitByReg(ls[i],"");
System.out.print(ls[i][0]+"##"+ls[i][1]);
System.out.println();
}
byte[] pageHtml = HttpUtil.getPage(strUrl);
//将页面转成string
String strHtml = new String(pageHtml, "GB2312");
String[][] ls= null;
ls = StringUtil.splitByReg(strHtml,"(\\d{2}:\\d{2}:\\d{2})</font>.*<font >(.+)</font>.*</tr>\\r\\n<tr>");
for(int i=0;i<ls.length;i++)
{
//String[] ls1[] = StringUtil.splitByReg(ls[i],"");
System.out.print(ls[i][0]+"##"+ls[i][1]);
System.out.println();
}
/**通用正则表达式解析函数
* splitByReg
* @param str 需要解析的字符串
* @param regExp 匹配的正则表达式
* @return 解析后字符串数组
*/
public static String[][] splitByReg(String str,String regExp) {
Pattern sp = Pattern.compile(regExp);
Matcher matcher = sp.matcher(str);
Vector<Vector<String>> colInoput= new Vector<Vector<String>>();
while (matcher.find()) {
Vector<String> v = new Vector<String>();
for(int i=1;i <= matcher.groupCount();i++)
{
v.add(matcher.group(i));
}
colInoput.add(v);
}
String[][] resultList =null;
if(colInoput.size()>0)
resultList=new String[colInoput.size()][colInoput.get(0).size()];
for(int i=0;i< colInoput.size();i++)
{
String[] kk = new String[colInoput.get(i).size()];
colInoput.get(i).copyInto(kk);
resultList[i] = kk;
}
return resultList;
}
* splitByReg
* @param str 需要解析的字符串
* @param regExp 匹配的正则表达式
* @return 解析后字符串数组
*/
public static String[][] splitByReg(String str,String regExp) {
Pattern sp = Pattern.compile(regExp);
Matcher matcher = sp.matcher(str);
Vector<Vector<String>> colInoput= new Vector<Vector<String>>();
while (matcher.find()) {
Vector<String> v = new Vector<String>();
for(int i=1;i <= matcher.groupCount();i++)
{
v.add(matcher.group(i));
}
colInoput.add(v);
}
String[][] resultList =null;
if(colInoput.size()>0)
resultList=new String[colInoput.size()][colInoput.get(0).size()];
for(int i=0;i< colInoput.size();i++)
{
String[] kk = new String[colInoput.get(i).size()];
colInoput.get(i).copyInto(kk);
resultList[i] = kk;
}
return resultList;
}