import java.io.*;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
public class Testing_openNLP {
/* http://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html 官方教程Apache OpenNLP Developer Documentation
* openNLP 中的各种模型可以在 http://opennlp.sourceforge.net/models-1.5/ 下载
* http://www.programcreek.com/2012/05/opennlp-tutorial/ this is good tutorial about openNLP tools
*
* */
public static void main(String[] args) {
// String testString = "This isn't the greatest example sentence in the world because I've seen better. Neither is this one. This one's not bad, though.";
String testString = "Hi. How are you? This is &3 $444 Mike." ;
String tokens[] = Token(testString);
String sentences[] = sentenceSegmentation(testString);
String aa = "";
}
//分句
public static String[] sentenceSegmentation(String str){
try {
InputStream modelIn = new FileInputStream("en-sent.bin");
SentenceModel model = null;
try {
model = new SentenceModel(modelIn);
}
catch (IOException e) {
e.printStackTrace();
}
finally {
if (modelIn != null) {
try {
modelIn.close();
}
catch (IOException e) {
}
}
}
SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
String sentences[] = sentenceDetector.sentDetect(str);
return sentences;
} catch (FileNotFoundException e1) {
e1.printStackTrace();
return null;
}
}
//分词
public static String[] Token(String str){
try{
InputStream modelIn = new FileInputStream("en-token.bin");
TokenizerModel model = null;
try {
model = new TokenizerModel(modelIn);
}
catch (IOException e) {
e.printStackTrace();
}
finally {
if (modelIn != null) {
try {
modelIn.close();
}
catch (IOException e) {
}
}
}
TokenizerME tokenizer = new TokenizerME(model);
String tokens[] = tokenizer.tokenize(str);
// double tokenProbs[] = tokenizer.getTokenProbabilities();//must be called directly after one of the tokenize methods was called.
return tokens;
}
catch(FileNotFoundException e){return null;}
}
}