C#读取word文档文本

读取word,首先得添加引用,不同的word版本对应着不同的引用

部分版本对应引用如下:

Microsoft Word 11.0 object library对应Office2003
Microsoft Word 12.0 object library对应Office2007
Microsoft Word 14.0 object library对应Office2010
Microsoft Word 15.0 object library对应Office2013

 

由于我电脑的版本是word 2007,故添加Microsoft Word 12.0 Object Library,添加方法,右击项目解决方案,选择 Add Reference,弹出对话框如下图:

 

 

 
再使用下面两个命名空间,如图:

 

 

 

 完整代码如下:

  1 using System;
  2 
  3 using System.Collections.Generic;
  4 
  5 using System.Linq;
  6 
  7 using System.Text;
  8 
  9 using Office;
 10 
 11 using Word;
 12 
 13 namespace ReadWordText
 14 
 15 {undefined
 16 
 17     class Program
 18 
 19     {undefined
 20 
 21         static void Main(string[] args)
 22 
 23         {undefined
 24 
 25  
 26 
 27             Application app = new Application();
 28 
 29             Document doc = null;
 30 
 31             object unknow = Type.Missing;
 32 
 33             object ReadOnly = false;//是否只能读
 34 
 35             object encoding = Encoding.UTF8;//UTF8编码
 36 
 37             app.Visible = false;
 38 
 39             string str = @"C:\Users\zxy\Desktop\读取word文档.doc";//文档的路径
 40 
 41             object file = str;
 42 
 43             try
 44 
 45             {undefined
 46 
 47                 doc = app.Documents.Open(ref file,
 48 
 49                ref unknow, ref ReadOnly, ref unknow, ref unknow,
 50 
 51                ref unknow, ref unknow, ref unknow, ref unknow,
 52 
 53                ref unknow, ref encoding, ref unknow, ref unknow,
 54 
 55                ref unknow, ref unknow, ref unknow);
 56 
 57  
 58 
 59                 //读取第几段内容(空白行、各级标题等均作为一段来算)  
 60 
 61                 //string strParaghaph = doc.Paragraphs[4].Range.Text.Trim();
 62 
 63  
 64 
 65                 //读取第几句内容(空白行、各级标题等都作为一句来算)
 66 
 67                 // string strSentence = doc.Sentences[5].Text;
 68 
 69  
 70 
 71  
 72 
 73                 //读取整篇内容
 74 
 75                 int sentencesLength = doc.Paragraphs.Count;//文档的总段数
 76 
 77                 for (int sen = 1; sen <= sentencesLength; sen++)
 78 
 79                 {undefined
 80 
 81                     string strSence = doc.Paragraphs[sen].Range.Text;//获取每段内容
 82 
 83                     Console.WriteLine(strSence);
 84 
 85                 }
 86 
 87             
 88 
 89  
 90 
 91  
 92 
 93             }
 94 
 95             catch (Exception)
 96 
 97             {undefined
 98 
 99  
100 
101                 Console.WriteLine("无法读取到文本");
102 
103             }
104 
105            
106 
107           
108 
109             Console.ReadKey();
110 
111  
112 
113  
114 
115         }
116 
117     }
118 
119 }
120 
121  

 



 

posted @ 2022-03-04 14:15  电子_精灵  阅读(925)  评论(0编辑  收藏  举报