HTML页面导出为Word

    protected void btnExport_Click(object sender, EventArgs e)
    {
        string strFileName = DateTime.Now.ToString("yyyyMMddHHmmss") + ".doc";
        string sHtml = hfdHtml.Value;//前台的HTML传递过来的,注意加ValidateRequest="false"
        string sMht = HtmlToMht(sHtml);
        DnLoadFileFromMemoryStream(strFileName, sMht);
    }

    /// <summary>
    /// 将HTML文本导出到Word或者MHT格式
    /// </summary>
    /// <param name="strHtml"></param>
    /// <returns></returns>
    public static string HtmlToMht(string strHtml)
    {
        strHtml = strHtml.Replace("<", "<").Replace(">", ">");
        StringBuilder sb = new StringBuilder();
        sb.AppendLine("From:");
        sb.AppendLine("Subject:");
        sb.AppendLine("Date:");
        sb.AppendLine("MIME-Version: 1.0");
        sb.AppendLine("Content-Type: multipart/related;");
        sb.AppendLine("\ttype=\"text/html\";");
        sb.AppendLine("\tboundary=\"----=_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\"");
        sb.AppendLine("\n");
        sb.AppendLine("------=_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
        sb.AppendLine("Content-Type: text/html;");
        sb.AppendLine("charset=\"gb2312\"");
        sb.AppendLine("Content-Transfer-Encoding: quoted-printable");
        sb.AppendLine("\n");
        sb.AppendLine("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">");
        sb.AppendLine("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
        sb.AppendLine("<head>");
        sb.AppendLine("<style>");
        sb.AppendLine("@page WordSection1");
        sb.AppendLine("{size:532.5pt 757.5pt;");
        sb.AppendLine("mso-page-orientation:poPortrait;");
        sb.AppendLine("margin:26.25pt 26.25pt 26.25pt 26.25pt;");
        sb.AppendLine("mso-header-margin:42.55pt;");
        sb.AppendLine("mso-footer-margin:49.6pt;");
        sb.AppendLine("mso-paper-source:0;}");
        sb.AppendLine("div.WordSection1");
        sb.AppendLine("{page:WordSection1;}");
        sb.AppendLine("</style>");
        sb.AppendLine("</head>");
        sb.AppendLine("<body>");
        sb.AppendLine("<div class=3DWordSection1>");
        sb.AppendLine(strHtml.Replace("src=", "src=3D").Replace("style=\"", "style= \\\"")).Replace("rowSpan=", "rowSpan=\\\"")
            .Replace("colSpan=", "colSpan=\\\"").Replace("width=", "width=\\\"").Replace("height=", "height=\\\"");
        sb.AppendLine("</div>");
        sb.AppendLine("</body>");
        sb.AppendLine("</html>");
        sb.AppendLine("\n");
        string[] imgSrcs = GetHtmlImageUrlList(strHtml);
        if (imgSrcs.Length > 0)
        {
            for (int i = 0; i < imgSrcs.Length; i++)
            {
                string strLink = imgSrcs[i];
                string strBianm = ConvertBase64(strLink);
                if (strBianm != "")
                {
                    sb.AppendLine("\n");
                    sb.AppendLine("------=_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA");
                    sb.AppendLine("Content-Type: image/jpeg");
                    sb.AppendLine("Content-Transfer-Encoding: base64");
                    sb.AppendLine("Content-Location: " + strLink);
                    sb.AppendLine("\n");
                    sb.AppendLine(strBianm);
                    sb.AppendLine("\n");
                }
            }
        }
        sb.AppendLine("------=_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA--");
        return sb.ToString();
    }

    /// <summary>
    /// 更改Img等HTML标签从相对路径改为绝对路径,注意HTML里初始化时是什么路径,在后台就是什么路径,它不会在后台变为绝对路径,所以有必要转换一下,不然导出的图片不能显示
    /// </summary>
    /// <param name="sHtmlText"></param>
    /// <returns></returns>
    public static string[] GetHtmlImageUrlList(string sHtmlText)
    {
        // 定义正则表达式用来匹配 img 标签       
        Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);
        // 搜索匹配的字符串       
        MatchCollection matches = regImg.Matches(sHtmlText);
        int i = 0;
        string[] sUrlList = new string[matches.Count];
        // 取得匹配项列表       
        foreach (Match match in matches)
            sUrlList[i++] = match.Groups["imgUrl"].Value;
        return sUrlList;
    }

    /// <summary>
    /// 将图片转换为Base64位格式字符串流
    /// </summary>
    /// <param name="filepath"></param>
    /// <returns></returns>
    public static string ConvertBase64(string filepath)
    {
        //变量
        string result = string.Empty;
        string path = string.Empty;
        if (filepath.Trim().Substring(0, 4) == "http")
        {
            result = string.Empty;
        }
        else
        {
            path = HttpContext.Current.Server.MapPath(filepath);
            //将文件转换为stream
            using (FileStream fs = new FileStream(path, FileMode.Open))
            {
                byte[] buffer = new byte[fs.Length];
                fs.Read(buffer, 0, buffer.Length);
                result = Convert.ToBase64String(buffer); //base64编码
            }
        }
        //返回编码后的字符串
        return result;
    }

    /// <summary>
    /// 将文本流转换为word流
    /// </summary>
    /// <param name="sFileName"></param>
    /// <param name="sContent"></param>
    public static void DnLoadFileFromMemoryStream(string sFileName, string sContent)
    {
        byte[] arrByte = Encoding.UTF8.GetBytes(sContent);
        using (MemoryStream ms = new MemoryStream())
        {
            HttpContext.Current.Response.ContentEncoding = System.Text.Encoding.UTF8;
            HttpContext.Current.Response.ContentType = "application/octet-stream";
            HttpContext.Current.Response.AddHeader("Content-Disposition", "attachment; filename=" + sFileName);
            HttpContext.Current.Response.BinaryWrite(arrByte);
        }
    }

 

posted on 2014-02-10 16:08  kingtiger  阅读(940)  评论(0编辑  收藏  举报

导航