我们经常将Excel格式的文件保存为csv格式以方便上传和修改,可是当数据中包含逗号和双引号的时候Excel会把该字段用双引号括住并把数据中的"改为"",从而给解析带来了困难。我写了以下函数来解析这样的字符串:
testSplitCSV.java:
import java.util.Vector; class testSplitCSV{ /** * Split one line of csv file * @return a String array results */ public static String[] splitCSV(String src) throws Exception{ if (src==null || src.equals("")) return new String[0]; StringBuffer st=new StringBuffer(); Vector result=new Vector(); boolean beginWithQuote = false; for (int i=0;i<src.length();i++){ char ch = src.charAt(i); if (ch=='\"'){ if (beginWithQuote){ i++; if (i>=src.length()){ result.addElement(st.toString()); st=new StringBuffer(); beginWithQuote=false; }else{ ch=src.charAt(i); if (ch == '\"'){ st.append(ch); }else if (ch == ','){ result.addElement(st.toString()); st=new StringBuffer(); beginWithQuote = false; }else{ throw new Exception("Single double-quote char mustn't exist in filed "+(result.size()+1)+" while it is begined with quote\nchar at:"+i); } } }else if (st.length()==0){ beginWithQuote = true; }else{ throw new Exception("Quote cannot exist in a filed which doesn't begin with quote!\nfield:"+(result.size()+1)); } }else if (ch==','){ if (beginWithQuote){ st.append(ch); }else{ result.addElement(st.toString()); st=new StringBuffer(); beginWithQuote = false; } }else{ st.append(ch); } } if (st.length()!=0){ if (beginWithQuote){ throw new Exception("last field is begin with but not end with double quote"); }else{ result.addElement(st.toString()); } } String rs[] = new String[result.size()]; for (int i=0;i<rs.length;i++){ rs[i]=(String)result.elementAt(i); } return rs; }
public static void main(String[] args){ String src1= "\"fh,zg\",sdf,\"asfs,\",\",dsdf\",\"aadf\"\"\",\"\"\"hdfg\",\"fgh\"\"dgnh\",hgfg'dfh,\"asdfa\"\"\"\"\",\"\"\"\"\"fgjhg\",\"gfhg\"\"\"\"hb\""; try { String[] Ret = splitCSV(src1); for (int i=0;i<Ret.length;i++){ System.out.println(i+": "+Ret[i]); } } catch(Exception e) { e.printStackTrace(); } } }
|