解析小例子

/**
  * 
  * @param s
  * @return 获网页标题
  */
 public String getTitle(final String s)
 {
  String regex;
  String title = "";
  final List<String> list = new ArrayList<String>();
  regex = "<title>.*?</title>";
  final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ);
  final Matcher ma = pa.matcher(s);
  while (ma.find())
  {
   list.add(ma.group());
  }
  for (int i = 0; i < list.size(); i++)
  {
   title = title + list.get(i);
  }
  return outTag(title);
 }
/**
  * 
  * @param s
  * @return 获链接
  */
 public List<String> getLink(final String s)
 {
  String regex;
  final List<String> list = new ArrayList<String>();
  regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>";
  final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
  final Matcher ma = pa.matcher(s);
  while (ma.find())
  {
   list.add(ma.group());
  }
  return list;
 }
/**
  * 
  * @param s
  * @return 获脚本代码
  */
 public List<String> getScript(final String s)
 {
  String regex;
  final List<String> list = new ArrayList<String>();
  regex = "<script.*?</script>";
  final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
  final Matcher ma = pa.matcher(s);
  while (ma.find())
  {
   list.add(ma.group());
  }
  return list;
 }
/**
  * 
  * @param s
  * @return 获CSS
  */
 public List<String> getCSS(final String s)
 {
  String regex;
  final List<String> list = new ArrayList<String>();
  regex = "<style.*?</style>";
  final Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
  final Matcher ma = pa.matcher(s);
  while (ma.find())
  {
   list.add(ma.group());
  }
  return list;
 }

 

posted @ 2014-09-04 15:15  GisClub  阅读(176)  评论(0编辑  收藏  举报