自己写的一个读取execl的帮助类

目标:读取execl的第一个sheet,并传入不需要读取的表头的行数,返回该execl里所有数据的list

解析共有2种:1、DOM      2、SAX

  1 import java.io.File;
  2 import java.io.IOException;
  3 import java.io.InputStream;
  4 import java.text.DecimalFormat;
  5 import java.util.ArrayList;
  6 import java.util.Iterator;
  7 import java.util.List;
  8 import java.util.regex.Matcher;
  9 import java.util.regex.Pattern;
 10 
 11 import org.apache.commons.logging.Log;
 12 import org.apache.commons.logging.LogFactory;
 13 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 14 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 15 import org.apache.poi.openxml4j.opc.OPCPackage;
 16 import org.apache.poi.ss.usermodel.Cell;
 17 import org.apache.poi.ss.usermodel.Row;
 18 import org.apache.poi.ss.usermodel.Sheet;
 19 import org.apache.poi.ss.usermodel.Workbook;
 20 import org.apache.poi.ss.usermodel.WorkbookFactory;
 21 import org.apache.poi.xssf.eventusermodel.XSSFReader;
 22 import org.apache.poi.xssf.model.SharedStringsTable;
 23 import org.apache.poi.xssf.usermodel.XSSFRichTextString;
 24 import org.xml.sax.Attributes;
 25 import org.xml.sax.ContentHandler;
 26 import org.xml.sax.InputSource;
 27 import org.xml.sax.SAXException;
 28 import org.xml.sax.XMLReader;
 29 import org.xml.sax.helpers.DefaultHandler;
 30 import org.xml.sax.helpers.XMLReaderFactory;
 31 
 32 /**
 33  * 基于XSSF and SAX (Event API)
 34  * 读取excel的第一个Sheet的内容
 35  * @author yzl
 36  *
 37  */
 38 public class ReadExcelUtils {
 39     private int headCount = 0;
 40     private List<List<String>> list = new ArrayList<List<String>>();
 41     private static final Log log = LogFactory.getLog(ReadExcelUtils.class);
 42     
 43     /**
 44      * 通过文件流构建DOM进行解析
 45      * @param ins
 46      * @param headRowCount   跳过读取的表头的行数
 47      * @return
 48      * @throws InvalidFormatException
 49      * @throws IOException
 50      */
 51     public  List<List<String>> processDOMReadSheet(InputStream ins,int headRowCount) throws InvalidFormatException, IOException {
 52         Workbook workbook = WorkbookFactory.create(ins);
 53         return this.processDOMRead(workbook, headRowCount);
 54     }
 55     
 56     /**
 57      * 采用DOM的形式进行解析
 58      * @param filename
 59      * @param headRowCount   跳过读取的表头的行数
 60      * @return
 61      * @throws IOException 
 62      * @throws InvalidFormatException 
 63      * @throws Exception
 64      */
 65     public  List<List<String>> processDOMReadSheet(String filename,int headRowCount) throws InvalidFormatException, IOException {
 66         Workbook workbook = WorkbookFactory.create(new File(filename));
 67         return this.processDOMRead(workbook, headRowCount);
 68     }
 69 
 70     /**
 71      * 采用SAX进行解析
 72      * @param filename
 73      * @param headRowCount
 74      * @return
 75      * @throws OpenXML4JException 
 76      * @throws IOException 
 77      * @throws SAXException 
 78      * @throws Exception
 79      */
 80     public List<List<String>> processSAXReadSheet(String filename,int headRowCount) throws IOException, OpenXML4JException, SAXException   {
 81         headCount = headRowCount;
 82         
 83         OPCPackage pkg = OPCPackage.open(filename);
 84         XSSFReader r = new XSSFReader( pkg );
 85         SharedStringsTable sst = r.getSharedStringsTable();
 86         XMLReader parser = fetchSheetParser(sst);
 87 
 88         Iterator<InputStream> sheets = r.getSheetsData();
 89         InputStream sheet = sheets.next();
 90         InputSource sheetSource = new InputSource(sheet);
 91         parser.parse(sheetSource);
 92         sheet.close();
 93         
 94         log.debug("时间:"+DateUtils.getNowTime()+",共读取了execl的记录数为 :"+list.size());
 95         
 96         return list;
 97     }
 98 
 99     private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
100         XMLReader parser =
101             XMLReaderFactory.createXMLReader(
102                     "org.apache.xerces.parsers.SAXParser"
103             );
104         ContentHandler handler = new SheetHandler(sst);
105         parser.setContentHandler(handler);
106         return parser;
107     }
108 
109     /** 
110      * SAX 解析excel
111      */
112     private class SheetHandler extends DefaultHandler {
113         private SharedStringsTable sst;
114         private String lastContents;
115         private boolean nextIsString;
116         private boolean isNullCell;
117         //读取行的索引
118         private int rowIndex = 0;
119         //是否重新开始了一行
120         private boolean curRow = false;
121         private List<String> rowContent;
122         
123         private SheetHandler(SharedStringsTable sst) {
124             this.sst = sst;
125         }
126         
127         public void startElement(String uri, String localName, String name,
128                 Attributes attributes) throws SAXException {
129             //节点的类型
130             //System.out.println("---------begin:" + name);
131             if(name.equals("row")){
132                 rowIndex++;
133             }
134             //表头的行直接跳过
135             if(rowIndex > headCount){
136                 curRow = true;
137                 // c => cell
138                 if(name.equals("c")) {
139                     String cellType = attributes.getValue("t");
140                     if(null == cellType){
141                         isNullCell = true;
142                     }else{
143                         if(cellType.equals("s")) {
144                             nextIsString = true;
145                         } else {
146                             nextIsString = false;
147                         }
148                         isNullCell = false;
149                     }
150                 }
151                 // Clear contents cache
152                 lastContents = "";
153             }
154         }
155         
156         public void endElement(String uri, String localName, String name)
157                 throws SAXException {
158             //System.out.println("-------end:"+name);
159             if(rowIndex > headCount){
160                 if(nextIsString) {
161                     int idx = Integer.parseInt(lastContents);
162                     lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
163                     nextIsString = false;
164                 }
165                 if(name.equals("v")) {
166                     //System.out.println(lastContents);
167                     if(curRow){
168                         //是新行则new一行的对象来保存一行的值
169                         if(null==rowContent){
170                             rowContent = new ArrayList<String>();
171                         }
172                         rowContent.add(lastContents);
173                     }
174                 }else if(name.equals("c") && isNullCell){
175                     if(curRow){
176                         //是新行则new一行的对象来保存一行的值
177                         if(null==rowContent){
178                             rowContent = new ArrayList<String>();
179                         }
180                         rowContent.add(null);
181                     }
182                 }
183                 
184                 isNullCell = false;
185 
186                 if("row".equals(name)){
187                     list.add(rowContent);
188                     curRow = false;
189                     rowContent = null;
190                 }
191             }
192             
193         }
194 
195         public void characters(char[] ch, int start, int length)
196                 throws SAXException {
197             lastContents += new String(ch, start, length);
198         }
199     }
200     
201     /**
202      * DOM的形式解析execl
203      * @param workbook
204      * @param headRowCount
205      * @return
206      * @throws InvalidFormatException
207      * @throws IOException
208      */
209     private List<List<String>> processDOMRead(Workbook workbook,int headRowCount) throws InvalidFormatException, IOException {
210         headCount = headRowCount;
211         
212         Sheet sheet = workbook.getSheetAt(0);
213         //行数
214         int endRowIndex = sheet.getLastRowNum();
215         
216         Row row = null;
217         List<String> rowList = null;
218         
219         for(int i=headCount; i<=endRowIndex; i++){
220             rowList = new ArrayList<String>();
221             row = sheet.getRow(i);
222             for(int j=0; j<row.getLastCellNum();j++){
223                 if(null==row.getCell(j)){
224                     rowList.add(null);
225                     continue;
226                 }
227                 int dataType = row.getCell(j).getCellType();
228                 if(dataType == Cell.CELL_TYPE_NUMERIC){
229                     DecimalFormat df = new DecimalFormat("0.####################");  
230                     rowList.add(df.format(row.getCell(j).getNumericCellValue()));
231                 }else if(dataType == Cell.CELL_TYPE_BLANK){
232                     rowList.add(null);
233                 }else if(dataType == Cell.CELL_TYPE_ERROR){
234                     rowList.add(null);
235                 }else{
236                     //这里的去空格根据自己的情况判断
237                     String valString = row.getCell(j).getStringCellValue();
238                     Pattern p = Pattern.compile("\\s*|\t|\r|\n");
239                     Matcher m = p.matcher(valString);
240                     valString = m.replaceAll("");
241                     //去掉狗日的不知道是啥东西的空格
242                     if(valString.indexOf(" ")!=-1){
243                         valString = valString.substring(0, valString.indexOf(" "));
244                     }
245                     
246                     rowList.add(valString);
247                 }
248             }
249             
250             list.add(rowList);
251         }
252         log.debug("时间:"+DateUtils.getNowTime()+",共读取了execl的记录数为 :"+list.size());
253         
254         return list;
255     }
256     
257     @SuppressWarnings("unused")
258     public static void main(String[] args) throws Exception {
259         ReadExcelUtils howto = new ReadExcelUtils();
260         String fileName = "f:/test.xlsx";
261         List<List<String>> list = howto.processSAXReadSheet(fileName,2);
262         
263         ReadExcelUtils h = new ReadExcelUtils();
264         String fileName1 = "f:/test.xls";
265         List<List<String>> result = h.processDOMReadSheet(fileName1,2);
266     }
267 }

 

posted @ 2014-10-26 15:44  自行车上的程序员  阅读(2149)  评论(0编辑  收藏  举报