使用Lucene和IKAnalyzer实现 中文简单 分词
1 import java.io.IOException; 2 import java.io.StringReader; 3 import java.util.ArrayList; 4 5 import org.wltea.analyzer.IKSegmentation; 6 import org.wltea.analyzer.Lexeme; 7 8 9 10 public class Test { 11 public static ArrayList<String> getStringSet(String s)throws IOException{ 12 ArrayList<String> ali = new ArrayList<String>(); 13 14 StringReader reader = new StringReader(s); 15 IKSegmentation ik = new IKSegmentation(reader , true); 16 Lexeme lexeme = null; 17 while ((lexeme = ik.next()) != null) { 18 ali.add(lexeme.getLexemeText()); 19 } 20 return ali; 21 } 22 public static void main(String[] args) throws IOException { 23 String s = "头痛是一种病"; 24 25 ArrayList<String> aList = Test.getStringSet(s); 26 while(!aList.isEmpty()){ 27 System.out.println(aList.get(0)); 28 aList.remove(0); 29 } 30 } 31 }
// 输出:
头痛
是
一种
病
——————————————————————————————————
实现输入的语句的分词。