利用Jsoup高亮html页面中的关键词
代码
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
public static void main(String[] args) throws IOException {
String keyword = "hello";
Document document = Jsoup.parse(new File("test.html"));
List<Node> childs = document.body().childNodes();
recursion(childs, keyword);
}
private static void recursion(List<Node> nodes, String keyword) {
for (Node node : nodes) {
if (node instanceof TextNode) {
TextNode textNode = ((TextNode) node);
String text = textNode.text();
StringBuffer sb = new StringBuffer();
String regex = "<mark>" + keyword + "</mark>";
Pattern r = Pattern.compile(keyword, Pattern.CASE_INSENSITIVE);
Matcher m = r.matcher(text);
int count = 0;
boolean result = m.find();
if (result) count++;
while (result) {
m.appendReplacement(sb, regex);
result = m.find();
if (result) count++;
}
m.appendTail(sb);
if (count > 0) {
Element span = new Element(Tag.valueOf("span"), "", null);
span.html(sb.toString());
textNode.replaceWith(span);
}
} else {
List<Node> childs = node.childNodes();
recursion(childs, keyword);
}
}
}
}
不积跬步无以至千里