“庖丁解牛” 分词器实现
import java.io.IOException; import java.io.StringReader; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; /** * 类说明 * * @author 曾修建 * @version 创建时间:2014-7-17 下午05:56:54 */ public class Cld_Analyz { private static String testString1 = "中华人湖发民共和国在1949年建立,从此開始了新中国的伟大篇章"; private static String testString2 = "乒乓球拍卖完了"; public static void Analyz() { Analyzer analyzer = new PaodingAnalyzer(); String indexStr = testString2; StringReader reader = new StringReader(indexStr); TokenStream ts = analyzer.tokenStream(indexStr, reader); Token t=null; StringBuffer sb=new StringBuffer(); try { t = ts.next(); int i=0; while (t != null) { if(i==0){ sb.append(t.termText()); } else{ sb.append(" AND "+t.termText()); } t = ts.next(); i++; } } catch (IOException e) { e.printStackTrace(); } System.out.println("结果是: "+sb ); analyzer.close(); } public static void main(String[] args) { Analyz(); } }
执行结果: 乒乓 AND 乒乓球 AND 球拍 AND 拍卖 AND 卖完 AND 完了