lucene下的一个自定义分词
public class ICTCLASAnalyzer : Analyzer
{
//定义要过滤的词
public static readonly System.String[] CHINESE_ENGLISH_STOP_WORDS = new string[428];
public string NoisePath = Environment.CurrentDirectory + "\\data\\stopwords.txt ";
public ICTCLASAnalyzer()
{
StreamReader reader = new StreamReader(NoisePath, System.Text.Encoding.Default);
string noise = reader.ReadLine();
int i = 0;
while (!string.IsNullOrEmpty(noise))
{
CHINESE_ENGLISH_STOP_WORDS[i] = noise;
noise = reader.ReadLine();
i++;
if (i >= 428)
break;
}
}
{
//定义要过滤的词
public static readonly System.String[] CHINESE_ENGLISH_STOP_WORDS = new string[428];
public string NoisePath = Environment.CurrentDirectory + "\\da
public ICTCLASAnalyzer()
{
StreamReader reader = new StreamReader(NoisePath, System.Text.Encoding.Default);
string noise = reader.ReadLine();
int i = 0;
while (!string.IsNullOrEmpty(noise))
{
CHINESE_ENGLISH_STOP_WORDS[i] = noise;
noise = reader.ReadLine();
i++;
if (i >= 428)
break;
}
}
public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader)
{
TokenStream result = new ICTCLASTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(result, CHINESE_ENGLISH_STOP_WORDS);
return result;
}
}
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步