将word文件转换为富文本编辑支持图片

思路是这样,先将word文件转换为HMTL文件通过Aspose.Words.dll 这个DLL

        /// <summary>
        /// word转成html
        /// </summary>
        /// <param name="wordFileName"></param>
        private string WordToHtml(string wordFileName, string folderName)
        {
            string wordSaveFileName = wordFileName.ToString();
            string strSaveFileName = wordSaveFileName.Replace(wordSaveFileName.Substring(wordSaveFileName.LastIndexOf(".")), "") + @"\" + folderName + ".html";
            Aspose.Words.Document doc = new Aspose.Words.Document(wordFileName.ToString());
            doc.Save(strSaveFileName, Aspose.Words.SaveFormat.Html);
            return strSaveFileName;
        }

下面开始读取html 将html 文件转换为字符串,然后匹配富文本框,替换IMG路径

        /// <summary>
        /// 读取html文件,返回字符串
        /// </summary>
        /// <param name="strHtmlFileName"></param>
        /// <returns></returns>
        private string getHtml(string strHtmlFileName)
        {
            System.Text.Encoding encoding = System.Text.Encoding.GetEncoding("utf-8");
            StreamReader sr = new StreamReader(strHtmlFileName, encoding);
            string str = sr.ReadToEnd();
            sr.Close();
            return str;
        }
        /// <summary>
        /// 返回网页HMTL
        /// </summary>
        /// <param name="strHtml"></param>
        /// <returns></returns>
        private string findUsedFromHtml(string strHtml, string strFileName)
        {
            string strStyle;
            string strBody;

            // stytle 部分
            int index = 0;
            int intStyleStart = 0;
            int intStyleEnd = 0;

            while (index < strHtml.Length)
            {
                int intStyleStartTmp = strHtml.IndexOf("<style>", index);
                if (intStyleStartTmp == -1)
                {
                    break;
                }
                int intContentStart = strHtml.IndexOf("<!--", intStyleStartTmp);
                if (intContentStart - intStyleStartTmp == 9)
                {
                    intStyleStart = intStyleStartTmp;
                    break;
                }
                else
                {
                    index = intStyleStartTmp + 7;
                }
            }

            index = 0;
            while (index < strHtml.Length)
            {
                int intContentEndTmp = strHtml.IndexOf("-->", index);
                if (intContentEndTmp == -1)
                {
                    break;
                }
                int intStyleEndTmp = strHtml.IndexOf("</style>", intContentEndTmp);
                if (intStyleEndTmp - intContentEndTmp == 5)
                {
                    intStyleEnd = intStyleEndTmp;
                    break;
                }
                else
                {
                    index = intContentEndTmp + 4;
                }
            }

            strStyle = strHtml.Substring(intStyleStart, intStyleEnd - intStyleStart + 8);

            // Body部分          
            int bodyStart = strHtml.IndexOf("<body");
            int bodyEnd = strHtml.IndexOf("</body>");

            strBody = strHtml.Substring(bodyStart, bodyEnd - bodyStart + 7);

            Regex reg = new Regex(@"(?is)(?<=>).*(?=</body>)", RegexOptions.IgnoreCase);//[^(<td>))] 
            Match mc = reg.Match(strBody);
            strBody = mc.Value;
            //替换图片地址
            string fullName = strFileName.Substring(strFileName.LastIndexOf("\\") + 1);
            string strOld = fullName.Replace("doc", "files").Replace(" ", "%20");
            string strNew = Request.ApplicationPath + "Upload/Product/PP_Doc/" + strOld;

            strBody = strBody.Replace(strOld, strNew);
            strBody = strBody.Replace("v:imagedata", "img");
            strBody = strBody.Replace("</v:imagedata>", "");
            strBody = strBody.Replace("<![endif]-->", "");
            strBody = strBody.Replace("<!--[endif]-->", "");
            strBody = strBody.Replace("<!--[if !supportLists]-->", "");
            strBody = strBody.Replace("<!--[if !vml]-->", "");
            strBody = strBody.Replace("<![if !vml]>", "");
            strBody = strBody.Replace("<![if !supportLists]>", "");
            strBody = strBody.Replace("<![endif]>", "");
            strBody = strBody.Replace("<!--[if gte vml 1]>", "");
            Regex RE = new Regex(@"<v:shape[\s\t\r\n\S]*?</v:shape>", RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
            int cnt = RE.Matches(strBody).Count;
            strBody = RE.Replace(strBody, "");
            //strBody = strBody.Replace("<v:shape ", "<v:shape style='display:none;' ");
            //Sgxcn临时调试用-但不可去掉
            //string strInnerHtml = strEditorInnerHtml.Replace("EditorValue", Request.Form["content"].ToString() + strStyle + strBody);
            //this.DvEditor.InnerHtml = strInnerHtml;
            strBody.Replace("<body>", "");
            strBody.Replace("</body>", "");

            return strStyle + strBody;
        }

        /// <summary>
        /// 返回网页HTML
        /// </summary>
        /// <param name="strHtml"></param>
        /// <returns></returns>
        private string getUseHtml(string strHtml, string folderName)
        {
            string strBody = strHtml;
            strBody = strBody.Replace("<body>", "");
            strBody = strBody.Replace("</body>", "");
            strBody = strBody.Replace("<html>", "");
            strBody = strBody.Replace("</html>", "");

            Regex RE = new Regex(@"<img[\s]+src[\s]*=[\s]*((['""](?<src>[^'""]*)[\'""])|(?<src>[^\s]*))", RegexOptions.Multiline);
            MatchCollection ms = RE.Matches(strBody);
            foreach (Match item in ms)
            {
                strBody = strBody.Replace(item.Groups[3].Value, "/Upload/Product/PP_Doc/" + folderName.Replace(folderName.Substring(folderName.LastIndexOf(".")), "") + "/" + item.Groups[3].Value);
            }

            return strBody;
        }

前台的富文本 直接读取字符串就行了

posted @ 2017-08-21 15:14  sprince  阅读(4648)  评论(0编辑  收藏  举报