IKAnalyzer兼容Lucene 5.4.0版本抛出异常?
ava.lang.AbstractMethodError: org.apache.lucene.analysis.Analyzer.createComponents(Ljava/lang/String;)Lorg/apache/lucene/analysis/Analyzer$TokenStreamComponents;
at org.apache.lucene.analysis.Analyzer.tokenStream(Analyzer.java:176)
at org.apache.lucene.document.Field.tokenStream(Field.java:562)
at org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:607)
at org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:344)
at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:300)
at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:234)
at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:450)
at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1477)
at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1256)
at com.study.lucene.demo.IndexFileTester.addDocument4(IndexFileTester.java:120)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
根据报错信息,原来是下载的IK Analyzer 2012FF_hf1.zip不兼容最新的Lucene 5.4.0版本(我下载的Lucene版本是5.4.0),看了下IK Analyzer 2012FF_hf1.zip的源码,原来是其中的IKAnalyzer.java和IKTokenizer.java两个类不兼容Lucene 5.4.0版本的API(这两个文件是IKAnalyzer接入Lucene 的核心类),对其进行相应的改进吧,本来想通过继承方式的,但KAnalyzer.java和IKTokenizer.java两个类都是final的,没办法,只能重写了。
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; public class ReIKAnalyzer extends Analyzer { private boolean useSmart; public boolean useSmart() { return useSmart; } public void setUseSmart(boolean useSmart) { this.useSmart = useSmart; } /** * IK分词器Lucene 5.4.0 Analyzer接口实现类 * * 默认细粒度切分算法 */ public ReIKAnalyzer() { this(false); } /** * IK分词器Lucene 5.4.0 Analyzer接口实现类 * * @param useSmart * 当为true时,分词器进行智能切分 */ public ReIKAnalyzer(boolean useSmart) { super(); this.useSmart = useSmart; } /** * 重载Analyzer接口,构造分词组件 * * @param fieldName * the name of the fields content passed to the * TokenStreamComponents sink as a reader */ @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer _IKTokenizer = new ReIKTokenizer(this.useSmart()); return new TokenStreamComponents(_IKTokenizer); } }
import java.io.IOException; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.Lexeme; public class ReIKTokenizer extends Tokenizer { // IK分词器实现 private IKSegmenter _IKImplement; // 词元文本属性 private final CharTermAttribute termAtt; // 词元位移属性 private final OffsetAttribute offsetAtt; // 词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量) private final TypeAttribute typeAtt; // 记录最后一个词元的结束位置 private int endPosition; /** * Lucene 5.4.0 Tokenizer适配器类构造函数 * * @param in * @param useSmart */ public ReIKTokenizer(boolean useSmart) { super(); offsetAtt = addAttribute(OffsetAttribute.class); termAtt = addAttribute(CharTermAttribute.class); typeAtt = addAttribute(TypeAttribute.class); _IKImplement = new IKSegmenter(input, useSmart); } @Override public boolean incrementToken() throws IOException { // 清除所有的词元属性 clearAttributes(); Lexeme nextLexeme = _IKImplement.next(); if (nextLexeme != null) { // 将Lexeme转成Attributes // 设置词元文本 termAtt.append(nextLexeme.getLexemeText()); // 设置词元长度 termAtt.setLength(nextLexeme.getLength()); // 设置词元位移 offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition()); // 记录分词的最后位置 endPosition = nextLexeme.getEndPosition(); // 记录词元分类 typeAtt.setType(nextLexeme.getLexemeTypeString()); // 返会true告知还有下个词元 return true; } // 返会false告知词元输出完毕 return false; } @Override public void reset() throws IOException { super.reset(); _IKImplement.reset(input); } @Override public final void end() { // set final offset int finalOffset = correctOffset(this.endPosition); offsetAtt.setOffset(finalOffset, finalOffset); } }