Atitit word ppt excel convert txt bp 等文档转换纯文本问题最佳实践.docx Atitit word ppt excel等文档转换txt问题最佳实践 目录 1.
Atitit word ppt excel convert txt bp 等文档转换纯文本问题最佳实践.docx
Atitit word ppt excel等文档转换txt问题最佳实践
目录
1.2. //分章节Section、段落Paragraph、字符串CharacterRun抽取 1
public class Word {
1.1. // Word 直接抽取全部内容
public static String readDoc1(InputStream is) throws IOException {
WordExtractor extractor = new WordExtractor(is);
return extractor.getText();
}
1.2. //分章节Section、段落Paragraph、字符串CharacterRun抽取
public static void readDoc2(InputStream is) throws IOException {
HWPFDocument doc=new HWPFDocument(is);
Range r=doc.getRange();
for(int x=0;x<r.numSections();x++){
Section s=r.getSection(x);
for(int y=0;y<s.numParagraphs();y++){
Paragraph p=s.getParagraph(y);
for(int z=0;z<p.numCharacterRuns();z++){
CharacterRun run=p.getCharacterRun(z);
String text=run.text();
System.out.print(text);
}
}
}
}
1.3. //直接抽取幻灯片的全部内容
public static String readDoc1(InputStream is) throws IOException{
PowerPointExtractor extractor=new PowerPointExtractor(is);
return extractor.getText();
}
1.4. //一张幻灯片一张幻灯片地读取
public static void readDoc2(InputStream is) throws IOException{
SlideShow ss=new SlideShow(new HSLFSlideShow(is));
Slide[] slides=ss.getSlides();
for(int i=0;i<slides.length;i++){
//读取一张幻灯片的标题
String title=slides[i].getTitle();
System.out.println("标题:"+title);
//读取一张幻灯片的内容(包括标题)
TextRun[] runs=slides[i].getTextRuns();
for(int j=0;j<runs.length;j++){
System.out.println(runs[j].getText());
}
}
}
public class Excel {
1.5. //直接读取Excel的全部内容
public static String readDoc1(InputStream is)throws IOException{
HSSFWorkbook wb=new HSSFWorkbook(new POIFSFileSystem(is));
ExcelExtractor extractor=new ExcelExtractor(wb);
extractor.setFormulasNotResults(false);
extractor.setIncludeSheetNames(true);
return extractor.getText();
}
1.6. //读取时细化到Sheet、行甚至单元格
public static double getAvg(InputStream is)throws IOException{
HSSFWorkbook wb=new HSSFWorkbook(new POIFSFileSystem(is));
//获取第一张sheet
HSSFSheet sheet=wb.getSheetAt(0);
double molecule=0.0;
double denominator=0.0;
//按行遍历sheet
Iterator<Row> riter=sheet.rowIterator();
while(riter.hasNext()){
HSSFRow row=(HSSFRow)riter.next();
HSSFCell cell1=row.getCell(4);
HSSFCell cell2=row.getCell(4);
if(cell1.getCellType()!=HSSFCell.CELL_TYPE_NUMERIC){
System.err.println("数字类型错误!");
System.exit(-2);
}
if(cell2.getCellType()!=HSSFCell.CELL_TYPE_NUMERIC){
System.err.println("数字类型错误!");
System.exit(-2);
}
denominator+=Double.parseDouble(cell2.toString().trim());
molecule+=Double.parseDouble(cell2.toString().trim())*Float.parseFloat(cell1.toString().trim());
}
return molecule/denominator;
}
java使用poi读取ppt文件和poi读取excel、word示例 - CSDN博客.html