☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆☆AnalyzerTool分词工具.非常实用!【转】
AnalyzerTool分词工具.非常实用!
可以查看某串字符最终被分割成什么样子,这样便于查询时深刻明白为什么有的查不到有的却能查到.
package test.main; import java.io.IOException; import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; /** * 展示分词后的效果 */ public class AnalyzerTool { /** * 打印分词后的信息 * * @param str * 待分词的字符串 * @param analyzer * 分词器 */ public static void displayToken(String str, Analyzer analyzer) { TokenStream stream = null; try { // 将一个字符串创建成Token流 stream = analyzer.tokenStream("content", new StringReader(str)); CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class); stream.reset();// 一定要重置,不然老报错 while (stream.incrementToken()) { System.out.print("【" + cta + "】"); } System.out.println(); } catch (IOException e) { e.printStackTrace(); } finally { try { if (stream != null) { stream.end(); stream.close(); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public static void main(String[] args) { Analyzer aly1 = new StandardAnalyzer(Version.LUCENE_40); Analyzer aly2 = new StopAnalyzer(Version.LUCENE_40); Analyzer aly3 = new SimpleAnalyzer(Version.LUCENE_40); Analyzer aly4 = new WhitespaceAnalyzer(Version.LUCENE_40); String str = "-LT1TT132#########LJRT1326#########LJRT1226#########)"; // LT1TT132#########LJRT1326#########LJRT1226######### // LA939VRG###AJA###LA939VRG###WSJ### // LA99HRD3###SYC### // LZ1B22EE######### AnalyzerTool.displayToken(str, aly1); AnalyzerTool.displayToken(str, aly2); AnalyzerTool.displayToken(str, aly3); AnalyzerTool.displayToken(str, aly4); } }
感觉空虚寂寞,只是因为你无所关注,无处付出。