代码改变世界

Lucene 分析过程

2012-01-04 16:06  _9527  阅读(198)  评论(0编辑  收藏  举报

 

package analysis;


import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;


public class AnalyzerDemo {
  
  /** 需要分析的数据 */
  private static final String[] examples = {
    "The quick brown fox jumped over the lazy dog",
    "XY&Z Corporation - xyz@example.com",
    "中华人名共和国1949年成立,从此中国人民酒陷入了水深火热之中,Fuck!"
  };
  
  /** 需要测试得分析器 */
  private static final Analyzer[] analyzers = new Analyzer[] { 
      new WhitespaceAnalyzer(),
      new SimpleAnalyzer(),
      new StopAnalyzer(Version.LUCENE_30),
      new StandardAnalyzer(Version.LUCENE_30)
  };

  /** 执行分析测试 */
  public static void main(String[] args) throws IOException {

    String[] strings = examples;

    for (String text : strings) {
      analyze(text);
    }
  }

  /** 格式化输出分析结果 */
  private static void analyze(String text) throws IOException {
    System.out.println("Analyzing \"" + text + "\"");
    for (Analyzer analyzer : analyzers) {
      String name = analyzer.getClass().getSimpleName();
      System.out.println("  " + name + ":");
      System.out.print("    ");
      AnalyzerUtils.displayTokens(analyzer, text); // B
      System.out.println("\n");
    }
  }
}

  

 

 

package analysis;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

public class AnalyzerUtils {
	public static void displayTokens(Analyzer analyzer, String text)
			throws IOException {
		displayTokens(analyzer.tokenStream("contents", new StringReader(text))); // A
	}

	public static void displayTokens(TokenStream stream) throws IOException {

		TermAttribute term = stream.addAttribute(TermAttribute.class);
		while (stream.incrementToken()) {
			System.out.print("[" + term.term() + "] "); // B
		}
	}
}

  

  

 

下载: Lucene_in_Action_2nd_Edition.rar