将EXCEL表格文字图片信息转成RTF格式写入数据库BLOB字段

最近在项目开发过程中,有一个需求是将Excel的文字图片信息导入数据库。

数据库中已存在的数据格式是RTF富文本的内容转成byte[]写入Oracle数据库的Blob字段的,

因此在不影响软件和保证已存在数据格式的前提下,需要将Excel的文字图片信息做转换处理。

 

流程如下:

 

需要导入的Excel文件内容

软件界面显示效果

读取EXCEL文件文字、图片信息,并将EXCEL文字、图片转换成RTF格式

这里我用的NPOI 2.3.0.0

  1 //RTF格式开始的固定字符串
  2 private const string RtfStartTxt = @"{\rtf1\deff0{\fonttbl{\f0 Calibri;}{\f1\fcharset134 Tahoma;}}{\colortbl\red0\green0\blue0 ;\red0\green0\blue255 ;}{\*\defchp \fs22}{\*\listoverridetable}{\stylesheet {\ql\fs22 Normal;}{\*\cs1\f1\fs18 Default Paragraph Font;}{\*\cs2\sbasedon1\f1\fs18 Line Number;}{\*\cs3\ul\fs22\cf1 Hyperlink;}{\*\ts4\tsrowd\fs22\ql\trautofit1\tscellpaddfl3\tscellpaddl108\tscellpaddfr3\tscellpaddr108\tsvertalt\cltxlrtb Normal Table;}{\*\ts5\tsrowd\sbasedon4\fs22\ql\trbrdrt\brdrs\brdrw10\trbrdrl\brdrs\brdrw10\trbrdrb\brdrs\brdrw10\trbrdrr\brdrs\brdrw10\trbrdrh\brdrs\brdrw10\trbrdrv\brdrs\brdrw10\trautofit1\tscellpaddfl3\tscellpaddl108\tscellpaddfr3\tscellpaddr108\tsvertalt\cltxlrtb Table Simple 1;}}\nouicompat\splytwnine\htmautsp\sectd\pard\plain\ql";
  3 //RTF代表文本的首字符串
  4 private const string RtfTxtStartTxt = @"{\f1\fs18\cf0 ";
  5 //RTF格式中代表一个回车
  6 private const string RtfEnter = @"\f2\fs18\cf0\par\pard\plain\ql";
  7 //RTF格式结束的固定字符串
  8 private const string RtfEndTxt = @"\f1\fs18\cf0\par}";
  9 //将字符串转换成RTF格式的内容
 10 private static string GetRichText(string str)
 11 {
 12     return str.Select(b => ((int) b).ToString(CultureInfo.InvariantCulture)).Select(tmp1 => @"\u" + tmp1 + "?").Aggregate("", (current, tmp2) => current + tmp2);
 13 }
 14 
 15 //将转化成字符串的图片信息按每200个字符加一个回车形成RTF图片信息
 16 private static string GetSplitString(string str)
 17 {
 18     string temp = string.Empty;
 19     for (int i = 0; i < str.Length; i += 200)
 20         temp += (i + 200 < str.Length) ? (str.Substring(i, 200) + "\r\n") : str.Substring(i);
 21     return temp;
 22 }
 23 /// <summary>
 24 /// 根据Excel列类型获取列的值
 25 /// </summary>
 26 /// <param name="cell">Excel列</param>
 27 /// <returns></returns>
 28 private static string GetCellValue(ICell cell)
 29 {
 30     if (cell == null)
 31         return string.Empty;
 32     switch (cell.CellType)
 33     {
 34         case CellType.Blank:
 35             return string.Empty;
 36         case CellType.Boolean:
 37             return cell.BooleanCellValue.ToString();
 38         case CellType.Error:
 39             return cell.ErrorCellValue.ToString(CultureInfo.InvariantCulture);
 40         case CellType.Numeric:
 41             short format = cell.CellStyle.DataFormat;
 42             //对时间格式(2015.12.5、2015/12/5、2015-12-5等)的处理
 43             if (format == 14 || format == 31 || format == 57 || format == 58)
 44                 return cell.DateCellValue.ToString(CultureInfo.InvariantCulture);
 45             return cell.NumericCellValue.ToString(CultureInfo.InvariantCulture);
 46         default:
 47             return cell.ToString();
 48         case CellType.String:
 49             return cell.StringCellValue;
 50         case CellType.Formula:
 51             try
 52             {
 53                 var e = new HSSFFormulaEvaluator(cell.Sheet.Workbook);
 54                 e.EvaluateInCell(cell);
 55                 return cell.ToString();
 56             }
 57             catch
 58             {
 59                 return cell.NumericCellValue.ToString(CultureInfo.InvariantCulture);
 60             }
 61     }
 62 }
 63 //替换字符串前面内容
 64 public static string TrimStarString(string s, string searchStr, string replaceStr)
 65 {
 66     var result = s;
 67     try
 68     {
 69         if (string.IsNullOrEmpty(result))
 70             return result;
 71         if (s.Length < searchStr.Length)
 72             return result;
 73         if (s.IndexOf(searchStr, 0, searchStr.Length, StringComparison.Ordinal) > -1)
 74             result = s.Substring(searchStr.Length);
 75         return result;
 76     }
 77     catch (Exception)
 78     {
 79         return result;
 80     }
 81 }
 82 //替换字符串后面内容
 83 public static string TrimEndString(string s, string searchStr, string replaceStr)
 84 {
 85     var result = s;
 86     try
 87     {
 88         if (string.IsNullOrEmpty(result))
 89             return result;
 90         if (s.Length < searchStr.Length)
 91             return result;
 92         if (s.IndexOf(searchStr, s.Length - searchStr.Length, searchStr.Length, StringComparison.Ordinal) > -1)
 93             result = s.Substring(0, s.Length - searchStr.Length);
 94         return result;
 95     }
 96     catch (Exception)
 97     {
 98         return result;
 99     }
100 }
101 /// <summary>
102 /// 将excel导入到datatable
103 /// </summary>
104 /// <param name="filePath">excel路径</param>
105 /// <param name="sheetName"></param>
106 /// <param name="isColumnName">第一行是否是列名</param>
107 /// <param name="dataTable"></param>
108 /// <returns>返回datatable</returns>
109 public static bool ExcelToDataTable(string filePath, string sheetName, bool isColumnName, out DataTable dataTable)
110 {
111     dataTable = null;
112     FileStream fs = null;
113     IWorkbook workbook = null;
114     int startRow = 0;
115     var pictures = new List<PicturesInfo>();
116     try
117     {
118         using (fs = File.OpenRead(filePath))
119         {
120             // 2007版本
121             if (filePath.ToUpper().EndsWith(".XLSX"))
122                 workbook = new XSSFWorkbook(fs);
123             // 2003版本
124             else if (filePath.ToUpper().EndsWith(".XLS"))
125                 workbook = new HSSFWorkbook(fs);
126             if (workbook != null)
127             {
128                 ISheet sheet = workbook.GetSheet(sheetName);
129                 dataTable = new DataTable();
130                 if (sheet != null)
131                 {
132                     pictures = NpoiExtend.GetAllPictureInfos(sheet);//获取图片信息
133                     int rowCount = sheet.LastRowNum; //总行数
134                     if (rowCount > 0)
135                     {
136                         IRow firstRow = sheet.GetRow(0); //第一行
137                         int cellCount = firstRow.LastCellNum; //列数
138                         //构建datatable的列
139                         ICell cell;
140                         DataColumn column;
141                         if (isColumnName)
142                         {
143                             startRow = 1; //如果第一行是列名,则从第二行开始读取
144                             for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
145                             {
146                                 cell = firstRow.GetCell(i);
147                                 if (cell != null)
148                                 {
149                                     if (cell.StringCellValue != null)
150                                     {
151                                         column = new DataColumn(cell.StringCellValue);
152                                         dataTable.Columns.Add(column);
153                                     }
154                                 }
155                             }
156                         }
157                         else
158                         {
159                             for (int i = firstRow.FirstCellNum; i < cellCount; ++i)
160                             {
161                                 column = new DataColumn("column" + (i + 1));
162                                 dataTable.Columns.Add(column);
163                             }
164                         }
165                         //填充行
166                         for (int i = startRow; i <= rowCount; ++i)
167                         {
168                             IRow row = sheet.GetRow(i);
169                             if (row == null) continue;
170                             DataRow dataRow = dataTable.NewRow();
171                             for (int j = row.FirstCellNum; j < cellCount; ++j)
172                             {
173                                 cell = row.GetCell(j);
174                                 if (cell == null)
175                                     dataRow[j] = "";
176                                 else
177                                     dataRow[j] = GetCellValue(cell);
178                             }
179                             dataTable.Rows.Add(dataRow);
180                         }
181                     }
182                 }
183             }
184         }
185         if (dataTable != null && dataTable.Columns.Count >= 0 && pictures.Count > 0)
186         {
187             var columnIndex = 0;
188             for (int i = 0; i < dataTable.Columns.Count; i++)
189             {
190                 if (dataTable.Columns[i].ColumnName == "说明")//找到说明列
191                 {
192                     columnIndex = i;
193                     break;
194                 }
195             }
196             for (int i = 0; i < dataTable.Rows.Count; i++)
197             {
198                 DataRow row = dataTable.Rows[i];
199                 var str = row[columnIndex].ToString();
200                 var pics = pictures.Where(x => x.MinCol == columnIndex && x.MinRow == i + 1).ToList();
201                 if (pics.Count > 0)
202                 {
203                     var allPicStr = "";
204                     foreach (PicturesInfo pic in pics)
205                     {
206                         byte[] picdata = pic.PictureData;//Excel 中的图片信息,byte[]格式
207                         var picstr = "";
208                         foreach (byte b in picdata)
209                         {
210                             string tempStr = Convert.ToString(b, 16);
211                             if (tempStr.Length == 1)
212                                 tempStr = "0" + tempStr;//不足一位需要前面补0
213                             picstr += tempStr;
214                         }//将byte[]格式图片=>转换成16进制
215                         picstr = GetSplitString(picstr);//转成RTF格式需要的内容
216                         //RTF图片格式{\*\shppict{\pict\pngblip\picw8494\pich3175\picwgoal4815\pichgoal1800\picscalex100\picscaley100 [图片16进制信息]}}
217                         picstr = @"{\*\shppict{\pict\pngblip" +
218                                     @"\picw" + pic.SizeWidth * 26 +
219                                     @"\pich" + pic.SizeHeight * 26 +
220                                     @"\picwgoal" + pic.SizeWidth * 26 * 72 / 127 +
221                                     @"\pichgoal" + pic.SizeHeight * 26 * 72 / 127 +
222                                     @"\picscalex100\picscaley100 " +
223                                     picstr + @"}}";
224                         allPicStr += picstr + RtfEnter;
225                     }
226                     var allTextStr = RtfTxtStartTxt + GetRichText(str) + "}" + RtfEnter;//Excel单元格文字内容转RTF格式
227                     var textPic = RtfStartTxt + allTextStr + allPicStr + RtfEndTxt;//将Excel文字和图片拼接到一起
228                     row[columnIndex] = textPic;
229                 }
230             }
231             var k = dataTable.Rows.Count - 1;
232             for (int i = k; i > 0; i--)//将多个单元格合并到一起,处理如图Excel格式内容
233             {
234                 DataRow row = dataTable.Rows[i];
235                 if (string.IsNullOrEmpty(row[0].ToString()) && !string.IsNullOrEmpty(row[columnIndex].ToString()))
236                 {
237                     var str1 = dataTable.Rows[i - 1][columnIndex].ToString();
238                     str1 = TrimEndString(str1, RtfEndTxt, "");
239                     var str2 = dataTable.Rows[i][columnIndex].ToString();
240                     str2 = TrimEndString(str2, RtfEndTxt, "");
241                     str2 = TrimStarString(str2, RtfStartTxt, "");
242                     dataTable.Rows[i - 1][columnIndex] = str1 + RtfEnter + str2 + RtfEndTxt;
243                     dataTable.Rows.RemoveAt(i);
244                 }
245             }
246         }
247         return true;
248     }
249     catch (Exception ex)
250     {
251         if (fs != null)
252             fs.Close();
253         return false;
254     }
255 }
View Code

读取Excel图片我参考的是这篇博文:https://www.cnblogs.com/hanzhaoxin/p/4442369.html

PicturesInfo.cs

在原博文的基础上,我加了SizeWidth 和 SizeHeight两个属性,用于后续生成RTF格式图片Size内容。

    public class PicturesInfo
    {
        public int MinRow { get; set; }
        public int MaxRow { get; set; }
        public int MinCol { get; set; }
        public int MaxCol { get; set; }
        public Byte[] PictureData { get; private set; }
        public int SizeWidth { get; set; }
        public int SizeHeight { get; set; }

        public PicturesInfo(int minRow, int maxRow, int minCol, int maxCol, Byte[] pictureData)
        {
            this.MinRow = minRow;
            this.MaxRow = maxRow;
            this.MinCol = minCol;
            this.MaxCol = maxCol;
            this.PictureData = pictureData;
            using (var ms = new MemoryStream(pictureData))
            {
                var image = Image.FromStream(ms);
                this.SizeWidth = image.Size.Width;
                this.SizeHeight = image.Size.Height;
            }
        }
    }
View Code

NpoiExtend.cs

public static class NpoiExtend
{
    public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet)
    {
        return sheet.GetAllPictureInfos(null,null,null,null);
    }
 
    public static List<PicturesInfo> GetAllPictureInfos(this ISheet sheet,int? minRow,int? maxRow,int? minCol,int? maxCol,bool onlyInternal = true)
    {
        if (sheet is HSSFSheet)
        {
            return GetAllPictureInfos((HSSFSheet)sheet,minRow,maxRow,minCol,maxCol,onlyInternal);
        }
        else if (sheet is XSSFSheet)
        {
            return GetAllPictureInfos((XSSFSheet)sheet, minRow, maxRow, minCol, maxCol, onlyInternal);
        }
        else
        {
            throw new Exception("未处理类型,没有为该类型添加:GetAllPicturesInfos()扩展方法!");
        }
    }
 
    private static List<PicturesInfo> GetAllPictureInfos(HSSFSheet sheet,int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
    {
        List<PicturesInfo> picturesInfoList = new List<PicturesInfo>();
 
        var shapeContainer = sheet.DrawingPatriarch as HSSFShapeContainer;
        if (null != shapeContainer)
        {
            var shapeList = shapeContainer.Children;
            foreach (var shape in shapeList)
            {
                if (shape is HSSFPicture && shape.Anchor is HSSFClientAnchor)
                {
                    var picture = (HSSFPicture)shape;
                    var anchor = (HSSFClientAnchor)shape.Anchor;
                    
                    if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal))
                    {
                        picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data));
                    }
                }
            }
        }
 
        return picturesInfoList;
    }
 
    private static List<PicturesInfo> GetAllPictureInfos(XSSFSheet sheet, int? minRow, int? maxRow, int? minCol, int? maxCol, bool onlyInternal)
    {
        List<PicturesInfo> picturesInfoList = new List<PicturesInfo>();
 
        var documentPartList = sheet.GetRelations();
        foreach (var documentPart in documentPartList)
        {
            if (documentPart is XSSFDrawing)
            {
                var drawing = (XSSFDrawing)documentPart;
                var shapeList = drawing.GetShapes();
                foreach (var shape in shapeList)
                {
                    if (shape is XSSFPicture)
                    {
                        var picture = (XSSFPicture)shape;
                        var anchor = picture.GetPreferredSize();
                        
                        if (IsInternalOrIntersect(minRow, maxRow, minCol, maxCol, anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, onlyInternal))
                        {
                            picturesInfoList.Add(new PicturesInfo(anchor.Row1, anchor.Row2, anchor.Col1, anchor.Col2, picture.PictureData.Data));
                        }
                    }
                }
            }
        }
 
        return picturesInfoList;
    }
 
    private static bool IsInternalOrIntersect(int? rangeMinRow, int? rangeMaxRow, int? rangeMinCol, int? rangeMaxCol,
        int pictureMinRow, int pictureMaxRow, int pictureMinCol, int pictureMaxCol, bool onlyInternal)
    {
        int _rangeMinRow = rangeMinRow ?? pictureMinRow;
        int _rangeMaxRow = rangeMaxRow ?? pictureMaxRow;
        int _rangeMinCol = rangeMinCol ?? pictureMinCol;
        int _rangeMaxCol = rangeMaxCol ?? pictureMaxCol;
 
        if (onlyInternal)
        {
            return (_rangeMinRow <= pictureMinRow && _rangeMaxRow >= pictureMaxRow &&
                    _rangeMinCol <= pictureMinCol && _rangeMaxCol >= pictureMaxCol);
        }
        else
        {
            return ((Math.Abs(_rangeMaxRow - _rangeMinRow) + Math.Abs(pictureMaxRow - pictureMinRow) >= Math.Abs(_rangeMaxRow + _rangeMinRow - pictureMaxRow - pictureMinRow)) &&
            (Math.Abs(_rangeMaxCol - _rangeMinCol) + Math.Abs(pictureMaxCol - pictureMinCol) >= Math.Abs(_rangeMaxCol + _rangeMinCol - pictureMaxCol - pictureMinCol)));
        }
    }
}
View Code

将RTF格式转成byte[],转换完成后就可以直接写入数据库了。

 foreach (DataRow row in dataTable.Rows)
 {
     byte[] filedBlob = Encoding.Default.GetBytes(row["说明"].ToString());//转成byte[]
 }
posted @ 2020-12-18 11:05  新*  阅读(781)  评论(0编辑  收藏  举报