Java 读取word文件内容
public static String getDocx(String uri){
//解析docx模板并获取document对象
XWPFDocument document;
//获取XWPFRun对象输出整个文本内容
StringBuffer tempText = new StringBuffer();
try {
document = new XWPFDocument(POIXMLDocument.openPackage(uri));
//获取整个文本对象
List<XWPFParagraph> allParagraph = document.getParagraphs();
for (XWPFParagraph xwpfParagraph : allParagraph) {
List<XWPFRun> runList = xwpfParagraph.getRuns();
for (XWPFRun xwpfRun : runList) {
tempText.append(xwpfRun.toString());
}
}
Document doc = Jsoup.parse(tempText.toString());
if (doc == null) {
return null;
}
// 获取
Elements matchElements = doc.select("evaluation");
Iterator<Element> i = matchElements.iterator();
//测评标题
String title = "";
//测评题目
List<String> question_arry=new ArrayList<String>();
//问题对应的选项
List<String> optionContent_arry=new ArrayList<String>();
//选项对应的权值
List<String> option_scoreArry=new ArrayList<String>();
while (i.hasNext()) {
Element el = i.next();
title = el.select("title").text();
Elements tm = doc.select("tm");
Iterator<Element> j = tm.iterator();
while (j.hasNext()) {
Element el_tm = j.next();
question_arry.add(el_tm.select("tm1").text());
//获取选项
Elements tm_xx = el_tm.select("option");
Iterator<Element> k = tm_xx.iterator();
String option = "";
String score = "";
while (k.hasNext()) {
Element el_xx = k.next();
option = option+el_xx.text()+"#";
score = score+el_xx.attr("score")+"#";
}
optionContent_arry.add(option.substring(0, option.length()-1));
option_scoreArry.add(score.substring(0, score.length()-1));
}
}
System.out.println("测评标题:"+title);
System.out.println("测评标题:"+question_arry);
System.out.println("测评选项:"+optionContent_arry);
System.out.println("选项权值:"+option_scoreArry);
/* //存放文档新地址
String newPath="";
//读取源文档内容到新文档
File file = new File(newPath);
if(!file.getParentFile().exists()){
file.getParentFile().mkdir();
file.getParentFile().createNewFile();
}
FileOutputStream stream = new FileOutputStream(newPath);
document.write(stream);//写入新文档
stream.close();*/
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//文档内容
return tempText.toString();
}
下面附上我本地的word文件内容
<evaluation>
<title class="one">关于个人喜好的测试</title>
<tm>
<tm1>你多少岁啦</tm1>
<option score="15">10</option>
<option score="25">12</option>
<option score="35">14</option>
<option score="45">16</option>
</tm>
<tm>
<tm1>你喜欢什么颜色</tm1>
<option score="15">红色</option>
<option score="25">黑色</option>
<option score="35">蓝色</option>
<option score="45">白色</option>
</tm>
</evaluation>