Lucene 分析过程
2012-01-04 16:06 _9527 阅读(198) 评论(0) 编辑 收藏 举报
package analysis; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.analysis.StopAnalyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; public class AnalyzerDemo { /** 需要分析的数据 */ private static final String[] examples = { "The quick brown fox jumped over the lazy dog", "XY&Z Corporation - xyz@example.com", "中华人名共和国1949年成立,从此中国人民酒陷入了水深火热之中,Fuck!" }; /** 需要测试得分析器 */ private static final Analyzer[] analyzers = new Analyzer[] { new WhitespaceAnalyzer(), new SimpleAnalyzer(), new StopAnalyzer(Version.LUCENE_30), new StandardAnalyzer(Version.LUCENE_30) }; /** 执行分析测试 */ public static void main(String[] args) throws IOException { String[] strings = examples; for (String text : strings) { analyze(text); } } /** 格式化输出分析结果 */ private static void analyze(String text) throws IOException { System.out.println("Analyzing \"" + text + "\""); for (Analyzer analyzer : analyzers) { String name = analyzer.getClass().getSimpleName(); System.out.println(" " + name + ":"); System.out.print(" "); AnalyzerUtils.displayTokens(analyzer, text); // B System.out.println("\n"); } } }
package analysis; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermAttribute; public class AnalyzerUtils { public static void displayTokens(Analyzer analyzer, String text) throws IOException { displayTokens(analyzer.tokenStream("contents", new StringReader(text))); // A } public static void displayTokens(TokenStream stream) throws IOException { TermAttribute term = stream.addAttribute(TermAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term.term() + "] "); // B } } }