解析小例子
/** * * @param s * @return 获网页标题 */ public String getTitle(final String s) { String regex; String title = ""; final List<String> list = new ArrayList<String>(); regex = "<title>.*?</title>"; final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } for (int i = 0; i < list.size(); i++) { title = title + list.get(i); } return outTag(title); }
/** * * @param s * @return 获链接 */ public List<String> getLink(final String s) { String regex; final List<String> list = new ArrayList<String>(); regex = "<a[^>]*href=(\"([^\"]*)\"|\'([^\']*)\'|([^\\s>]*))[^>]*>(.*?)</a>"; final Pattern pa = Pattern.compile(regex, Pattern.DOTALL); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } return list; }
/** * * @param s * @return 获脚本代码 */ public List<String> getScript(final String s) { String regex; final List<String> list = new ArrayList<String>(); regex = "<script.*?</script>"; final Pattern pa = Pattern.compile(regex, Pattern.DOTALL); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } return list; }
/** * * @param s * @return 获CSS */ public List<String> getCSS(final String s) { String regex; final List<String> list = new ArrayList<String>(); regex = "<style.*?</style>"; final Pattern pa = Pattern.compile(regex, Pattern.DOTALL); final Matcher ma = pa.matcher(s); while (ma.find()) { list.add(ma.group()); } return list; }