word或Excel程序是以一种COM组件的形式存在的。如果能从Java中调用word的COM组件,就能够使用它的方法获取Word文档中的文本信息,目前网上也有很多提供这种操作的工具。使用jacob前应确保本机安装有Word的应用程序,否则无法建立Java-COM桥,进而无法解析。
jacob的下载地址为http://sourceforge.net/project/showfiles.php?group_id=109543&package_id=118368
下载到本机后解压缩。将jacob.jar复制到项目目录,增加到Java Build Path,然后将jacob.dll文件放入系统盘的system32文件夹下。如果没有放的话,会出现"no jacob-1.14.3-x86 in java.library.path"的异常。
1package test;
2
3import com.jacob.activeX.ActiveXComponent;
4import com.jacob.com.Dispatch;
5import com.jacob.com.Variant;
6
7public class WordReader {
8
9 public static void extractDoc(String inputFile, String outputFile){
10 boolean flag = false;
11 //打开word应用程序,生成一个ActivexComponent对象
12 ActiveXComponent app = new ActiveXComponent("Word.Application");
13 try{
14 //设置Word不可见
15 app.setProperty("Visible", new Variant(false));
16 //打开word文件
17 Dispatch doc1 = app.getProperty("Documents").toDispatch();
18 Dispatch doc2 = Dispatch.invoke(doc1, "Open", Dispatch.Method, new Object[]{ inputFile, new Variant(false),new Variant(true)},new int[1]).toDispatch();
19 //作文txt格式保存到临时文件
20 Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[]{outputFile, new Variant(7)}, new int[1]);
21 //关闭word
22 Variant f = new Variant(false);
23 Dispatch.call(doc2, "Close", f);
24 flag = true;
25 }catch(Exception e){
26 e.printStackTrace();
27 }finally{
28 app.invoke("Quit", new Variant[]{});
29 }
30 if(flag == true){
31 System.out.println("Transformed Successfully!");
32 }else{
33 System.out.println("Transform Failed");
34 }
35
36 }
37
38 /**
39 * @param args
40 */
41 public static void main(String[] args) {
42 //对测试文本进行处理
43 WordReader.extractDoc("d:/index/网点地址.doc", "d:/index/网点地址.txt");
44 }
45}
46
2
3import com.jacob.activeX.ActiveXComponent;
4import com.jacob.com.Dispatch;
5import com.jacob.com.Variant;
6
7public class WordReader {
8
9 public static void extractDoc(String inputFile, String outputFile){
10 boolean flag = false;
11 //打开word应用程序,生成一个ActivexComponent对象
12 ActiveXComponent app = new ActiveXComponent("Word.Application");
13 try{
14 //设置Word不可见
15 app.setProperty("Visible", new Variant(false));
16 //打开word文件
17 Dispatch doc1 = app.getProperty("Documents").toDispatch();
18 Dispatch doc2 = Dispatch.invoke(doc1, "Open", Dispatch.Method, new Object[]{ inputFile, new Variant(false),new Variant(true)},new int[1]).toDispatch();
19 //作文txt格式保存到临时文件
20 Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[]{outputFile, new Variant(7)}, new int[1]);
21 //关闭word
22 Variant f = new Variant(false);
23 Dispatch.call(doc2, "Close", f);
24 flag = true;
25 }catch(Exception e){
26 e.printStackTrace();
27 }finally{
28 app.invoke("Quit", new Variant[]{});
29 }
30 if(flag == true){
31 System.out.println("Transformed Successfully!");
32 }else{
33 System.out.println("Transform Failed");
34 }
35
36 }
37
38 /**
39 * @param args
40 */
41 public static void main(String[] args) {
42 //对测试文本进行处理
43 WordReader.extractDoc("d:/index/网点地址.doc", "d:/index/网点地址.txt");
44 }
45}
46