C#读取word文档文本
读取word,首先得添加引用,不同的word版本对应着不同的引用
部分版本对应引用如下:
Microsoft Word 11.0 object library对应Office2003
Microsoft Word 12.0 object library对应Office2007
Microsoft Word 14.0 object library对应Office2010
Microsoft Word 15.0 object library对应Office2013
由于我电脑的版本是word 2007,故添加Microsoft Word 12.0 Object Library,添加方法,右击项目解决方案,选择 Add Reference,弹出对话框如下图:
再使用下面两个命名空间,如图:
完整代码如下:
1 using System; 2 3 using System.Collections.Generic; 4 5 using System.Linq; 6 7 using System.Text; 8 9 using Office; 10 11 using Word; 12 13 namespace ReadWordText 14 15 {undefined 16 17 class Program 18 19 {undefined 20 21 static void Main(string[] args) 22 23 {undefined 24 25 26 27 Application app = new Application(); 28 29 Document doc = null; 30 31 object unknow = Type.Missing; 32 33 object ReadOnly = false;//是否只能读 34 35 object encoding = Encoding.UTF8;//UTF8编码 36 37 app.Visible = false; 38 39 string str = @"C:\Users\zxy\Desktop\读取word文档.doc";//文档的路径 40 41 object file = str; 42 43 try 44 45 {undefined 46 47 doc = app.Documents.Open(ref file, 48 49 ref unknow, ref ReadOnly, ref unknow, ref unknow, 50 51 ref unknow, ref unknow, ref unknow, ref unknow, 52 53 ref unknow, ref encoding, ref unknow, ref unknow, 54 55 ref unknow, ref unknow, ref unknow); 56 57 58 59 //读取第几段内容(空白行、各级标题等均作为一段来算) 60 61 //string strParaghaph = doc.Paragraphs[4].Range.Text.Trim(); 62 63 64 65 //读取第几句内容(空白行、各级标题等都作为一句来算) 66 67 // string strSentence = doc.Sentences[5].Text; 68 69 70 71 72 73 //读取整篇内容 74 75 int sentencesLength = doc.Paragraphs.Count;//文档的总段数 76 77 for (int sen = 1; sen <= sentencesLength; sen++) 78 79 {undefined 80 81 string strSence = doc.Paragraphs[sen].Range.Text;//获取每段内容 82 83 Console.WriteLine(strSence); 84 85 } 86 87 88 89 90 91 92 93 } 94 95 catch (Exception) 96 97 {undefined 98 99 100 101 Console.WriteLine("无法读取到文本"); 102 103 } 104 105 106 107 108 109 Console.ReadKey(); 110 111 112 113 114 115 } 116 117 } 118 119 } 120 121