SqlAnalyzer1.00源码
SQL解析的夙愿今天终于完成,但限于SQL远超算术表达式,Json,XML等的复杂度,只是解析了一部分。形成普适性的SQL解析,仍需倾注精力。
代码下载:https://files.cnblogs.com/files/heyang78/SqlAnalyzer-20200527-2.rar
测试用例:https://www.cnblogs.com/heyang78/p/12974097.html
核心诸类:
Token:
package com.heyang; public class Token{ public static final int TYPE_SELECT=1; public static final int TYPE_TEXT=2; public static final int TYPE_COMMA=3; public static final int TYPE_FROM=4; public static final int TYPE_WHERE=5; public static final int TYPE_AND=6; public static final int TYPE_EQUAL=7; public static final int TYPE_OR=8; public static final int TYPE_ORDER=9; public static final int TYPE_BY=10; public static final int TYPE_ASC=11; public static final int TYPE_DESC=12; public static final int TYPE_AS=13; public static final int TYPE_GROUP=14; public static final int TYPE_HAVING=15; public static final int TYPE_LESSTHAN=16; public static final int TYPE_GREATERTHAN=17; public static final int TYPE_OPEN_PARENTHESIS=18; public static final int TYPE_CLOSE_PARENTHESIS=19; public static final int TYPE_CONNECT=20; public static final int TYPE_LESSTHAN_OR_EQUAL=21; public static final int TYPE_GREATERTHAN_OR_EQUAL=22; public static final int TYPE_LESSTHAN_OR_GREATERTHAN=23; public static final int TYPE_CASE=24; public static final int TYPE_WHEN=25; public static final int TYPE_THEN=26; public static final int TYPE_ELSE=27; public static final int TYPE_END=28; public static final int TYPE_IS=29; public static final int TYPE_NULL=30; public static final int TYPE_TRUE=31; public static final int TYPE_FALSE=32; public static final int TYPE_PLUS=33; public static final int TYPE_MINUS=34; //public static final int TYPE_MULTI=35; public static final int TYPE_DEVIDE=36; public static final int TYPE_DISTINCT=37; public static final int TYPE_OVER=38; public static final int TYPE_STRING_CONCAT=39; public static final int TYPE_ON=40; public static final int TYPE_JOIN=41; public static final int TYPE_INNER=42; public static final int TYPE_LEFT=43; public static final int TYPE_RIGHT=44; public static final int TYPE_OUTER=45; public static final int TYPE_FULL=46; public static final int TYPE_WITHIN=47; public static final int TYPE_PARTITION=48; public static final int TYPE_LIKE=49; public static final int TYPE_LIMIT=50; private int type; private String text; private int index;// Used to remember location public Token(char c,int type) { this.text=String.valueOf(c); this.type=type; } public Token(String word,int type) { this.text=word; this.type=type; } public String toString() { return String.format("token(text=%s,type=%s,index=%d)", text,getTypeStr(),index); } public String getTypeStr() { if(type==TYPE_SELECT) { return "KW:select"; }else if(type==TYPE_FROM) { return "KW:from"; }else if(type==TYPE_COMMA) { return "Comma"; }else if(type==TYPE_TEXT) { return "Text"; }else if(type==TYPE_WHERE) { return "KW:where"; }else if(type==TYPE_AND) { return "KW:and"; }else if(type==TYPE_EQUAL) { return "="; }else if(type==TYPE_OR) { return "KW:or"; }else if(type==TYPE_ORDER) { return "KW:order"; }else if(type==TYPE_BY) { return "KW:by"; }else if(type==TYPE_ASC) { return "KW:asc"; }else if(type==TYPE_DESC) { return "KW:desc"; }else if(type==TYPE_AS) { return "KW:as"; }else if(type==TYPE_GROUP) { return "KW:group"; }else if(type==TYPE_HAVING) { return "KW:having"; }else if(type==TYPE_LESSTHAN) { return "<"; }else if(type==TYPE_GREATERTHAN) { return ">"; }else if(type==TYPE_OPEN_PARENTHESIS) { return "("; }else if(type==TYPE_CLOSE_PARENTHESIS) { return ")"; }else if(type==TYPE_CONNECT) { return "KW:connect"; }else if(type==TYPE_LESSTHAN_OR_EQUAL) { return "<="; }else if(type==TYPE_GREATERTHAN_OR_EQUAL) { return ">="; }else if(type==TYPE_LESSTHAN_OR_GREATERTHAN) { return "<>"; }else if(type==TYPE_CASE) { return "KW:case"; }else if(type==TYPE_WHEN) { return "KW:when"; }else if(type==TYPE_THEN) { return "KW:then"; }else if(type==TYPE_ELSE) { return "KW:else"; }else if(type==TYPE_END) { return "KW:end"; }else if(type==TYPE_IS) { return "KW:is"; }else if(type==TYPE_NULL) { return "KW:null"; }else if(type==TYPE_TRUE) { return "KW:true"; }else if(type==TYPE_FALSE) { return "KW:false"; }else if(type==TYPE_PLUS) { return "+"; }else if(type==TYPE_MINUS) { return "-"; }else if(type==TYPE_DEVIDE) { return "/"; }else if(type==TYPE_DISTINCT) { return "KW:distinct"; }else if(type==TYPE_OVER) { return "KW:over"; }else if(type==TYPE_STRING_CONCAT) { return "||"; }else if(type==TYPE_ON) { return "KW:on"; }else if(type==TYPE_JOIN) { return "KW:join"; }else if(type==TYPE_INNER) { return "KW:inner"; }else if(type==TYPE_LEFT) { return "KW:left"; }else if(type==TYPE_RIGHT) { return "KW:right"; }else if(type==TYPE_OUTER) { return "KW:outer"; }else if(type==TYPE_FULL) { return "KW:full"; }else if(type==TYPE_WITHIN) { return "KW:within"; }else if(type==TYPE_PARTITION) { return "KW:partition"; }else if(type==TYPE_LIKE) { return "KW:like"; }else if(type==TYPE_LIMIT) { return "KW:limit"; } return null; } public int getType() { return type; } public String getText() { return text; } public void setText(String text) { this.text=text; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } public void setType(int type) { this.type = type; } }
Lexer:
package com.heyang; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.List; // v1.00 public class Lexer { private List<Token> tokens; public Lexer(String inputSql) { String sql=pretreat(inputSql); String swallowed=""; tokens=new ArrayList<Token>(); for(int i=0;i<sql.length();i++){ char c=sql.charAt(i); if(Character.isWhitespace(c)){ addTextToList(swallowed); swallowed=""; }else if(c==','){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_COMMA)); }else if(c=='='){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_EQUAL)); }else if(c=='<'){ int next=i+1; if(next<sql.length() && sql.charAt(next)=='=') { addTextToList(swallowed); swallowed=""; tokens.add(new Token("<=",Token.TYPE_LESSTHAN_OR_EQUAL)); i++; }else if(next<sql.length() && sql.charAt(next)=='>') { addTextToList(swallowed); swallowed=""; tokens.add(new Token("<>",Token.TYPE_LESSTHAN_OR_GREATERTHAN)); i++; }else { addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_LESSTHAN)); } }else if(c=='>'){ int next=i+1; if(next<sql.length() && sql.charAt(next)=='=') { addTextToList(swallowed); swallowed=""; tokens.add(new Token(">=",Token.TYPE_GREATERTHAN_OR_EQUAL)); i++; }else { addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_GREATERTHAN)); } }else if(c=='|'){ int next=i+1; if(next<sql.length() && sql.charAt(next)=='|') { addTextToList(swallowed); swallowed=""; tokens.add(new Token("||",Token.TYPE_STRING_CONCAT)); i++; } }else if(c=='('){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_OPEN_PARENTHESIS)); }else if(c==')'){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_CLOSE_PARENTHESIS)); }else if(c=='+'){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_PLUS)); }else if(c=='-'){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_MINUS)); }else if(c=='/'){ addTextToList(swallowed); swallowed=""; tokens.add(new Token(c,Token.TYPE_DEVIDE)); }else { swallowed+=c; } } setTokenIndexes(); } public void setTokenIndexes() { int idx = 0; for (Token t : tokens) { idx++; t.setIndex(idx); } } private int findTypeByText(String text) { Object[][] arr= { {"select", Token.TYPE_SELECT}, {"from", Token.TYPE_FROM}, {"where", Token.TYPE_WHERE}, {"and", Token.TYPE_AND}, {"or", Token.TYPE_OR}, {"order", Token.TYPE_ORDER}, {"by", Token.TYPE_BY}, {"asc", Token.TYPE_ASC}, {"desc", Token.TYPE_ASC}, {"asc", Token.TYPE_DESC}, {"as", Token.TYPE_AS}, {"group", Token.TYPE_GROUP}, {"having", Token.TYPE_HAVING}, {"connect", Token.TYPE_CONNECT}, {"case", Token.TYPE_CASE}, {"when", Token.TYPE_WHEN}, {"then", Token.TYPE_THEN}, {"else", Token.TYPE_ELSE}, {"end", Token.TYPE_END}, {"is", Token.TYPE_IS}, {"null", Token.TYPE_NULL}, {"true", Token.TYPE_TRUE}, {"false", Token.TYPE_FALSE}, {"distinct", Token.TYPE_DISTINCT}, {"over", Token.TYPE_OVER}, {"on", Token.TYPE_ON}, {"join", Token.TYPE_JOIN}, {"inner", Token.TYPE_INNER}, {"left", Token.TYPE_LEFT}, {"right", Token.TYPE_RIGHT}, {"outer", Token.TYPE_OUTER}, {"full", Token.TYPE_FULL}, {"within", Token.TYPE_WITHIN}, {"partition", Token.TYPE_PARTITION}, {"like", Token.TYPE_LIKE}, {"limit", Token.TYPE_LIMIT}, }; for(Object[] arrInner:arr) { String keyword=String.valueOf(arrInner[0]); if(keyword.equalsIgnoreCase(text)) { return Integer.parseInt(arrInner[1].toString()); } } return Token.TYPE_TEXT; } private void addTextToList(String text) { int type=findTypeByText(text); addToken2List(text,type); } private void addToken2List(String text,int type) { if(text.trim().length()>0) { tokens.add(new Token(text,type)); } } public void printTokens() { final String continuousStar = createRepeatedStr("-", 84); final String layout = "%-20s %-20s %-20s %-20s %s"; StringBuilder sb = new StringBuilder(); sb.append(String.format(layout, "Index", "Type No","Text","Type Desc","\n")); sb.append(continuousStar + "\n"); int index=0; for(Token token:tokens) { sb.append(String.format(layout, String.valueOf(index),String.valueOf(token.getType()), token.getText(),token.getTypeStr(),"\n")); index++; } System.out.println(sb.toString()); } public List<Token> getTokens() { return tokens; } private static String createRepeatedStr(String seed, int n) { return String.join("", Collections.nCopies(n, seed)); } private String pretreat(String raw) { return raw.trim()+" "; } public static void main(String[] args) throws Exception{ String sql=removeExtraSpace(readSqlFromFile("c:\\temp\\13.sql")); System.out.println(sql); new Lexer(sql).printTokens(); } private static String readSqlFromFile(String filePath) throws Exception{ StringBuilder sb=new StringBuilder(); BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), "UTF-8")); String line = null; while( ( line = br.readLine() ) != null ) { sb.append(line); } br.close(); return sb.toString(); } private static String removeExtraSpace(String raw) { return raw.replaceAll("\\s{2,}", " "); } }
Node:
package com.heyang; import java.util.ArrayList; import java.util.Collections; import java.util.List; public class Node { private String kwSelect; private List<Field> fields; private String kwFrom; private List<Table> tables; private String kwWhere; private List<Condition> conditions; private String kwOrderby; private List<Order> orders; private int depth=0; public void addOrder(Order o) { if(orders==null) { orders=new ArrayList<Order>(); } orders.add(o); } public void addCondition(Condition c) { if(conditions==null) { conditions=new ArrayList<Condition>(); } conditions.add(c); } public String toString() { String tabs=getIndentSpace(); StringBuilder sb=new StringBuilder(); sb.append(tabs+kwSelect+"\n"); if(fields!=null) { List<String> ls=new ArrayList<String>(); for(Field f:fields) { ls.add(tabs+" "+f.toString()); } sb.append(String.join(",\n", ls)); } sb.append("\n"); sb.append(tabs+kwFrom+"\n"); if(tables!=null) { List<String> ls=new ArrayList<String>(); for(Table tb:tables) { ls.add(tabs+" "+tb.toString()+""); } sb.append(String.join(",\n", ls)); } if(kwWhere!=null) { sb.append("\n"); sb.append(tabs+kwWhere+"\n"); } if(conditions!=null) { List<String> ls=new ArrayList<String>(); for(Condition cd:conditions) { ls.add(tabs+" "+cd.toString()+""); } sb.append(String.join("\n", ls)); } if(kwOrderby!=null) { sb.append("\n"); sb.append(tabs+kwOrderby+"\n"); } if(orders!=null) { List<String> ls=new ArrayList<String>(); for(Order od:orders) { ls.add(tabs+" "+od.toString()+""); } sb.append(String.join(",\n", ls)); } return sb.toString(); } private String getIndentSpace() { return String.join("", Collections.nCopies(this.depth, " ")); } private void adjustDepth() { if(tables==null) { return; } for(Table tb:tables) { Node nd=tb.getTableNode(); if(nd!=null) { nd.depth=this.depth+1; nd.adjustDepth(); } } if(fields==null) { return; } for(Field fd:fields) { Node nd=fd.getColumnNode(); if(nd!=null) { nd.depth=this.depth+1; nd.adjustDepth(); } } } public void addTable(Table t) { if(tables==null) { tables=new ArrayList<Table>(); } tables.add(t); adjustDepth(); } public void addField(Field f) { if(fields==null) { fields=new ArrayList<Field>(); } fields.add(f); adjustDepth();; } public void setKwSelect(String s) { this.kwSelect=s; } public String getKwFrom() { return kwFrom; } public void setKwFrom(String kwFrom) { this.kwFrom = kwFrom; } public String getKwSelect() { return kwSelect; } public String getKwWhere() { return kwWhere; } public void setKwWhere(String kwWhere) { this.kwWhere = kwWhere; } public String getKwOrderby() { return kwOrderby; } public void setKwOrderby(String kwOrderby) { this.kwOrderby = kwOrderby; } }
TreeBuilder:
package com.heyang; import java.util.List; public class TreeBuilder { private Node root; private List<Token> tokens; private int tokenIdx; public TreeBuilder(List<Token> tokens) throws Exception{ this.tokens=tokens; this.tokenIdx=0; root=new Node(); parseNode(root); } private void parseNode(Node parent) throws Exception{ Token token; // select token=fetchToken(); if(token.getType()!=Token.TYPE_SELECT) { throw new Exception("Expected:'Select' actual:"+token.getText()+" "+token); } parent.setKwSelect(token.getText()); // find fields for(;;) { token=fetchToken(); Field field=new Field(); if(token.getType()==Token.TYPE_TEXT) { String column=token.getText(); field.setColumn(column); }else if(token.getType()==Token.TYPE_OPEN_PARENTHESIS) { Node columnNode=new Node(); field.setColumnNode(columnNode); parseNode(columnNode); token=fetchToken(); if(token.getType()!=Token.TYPE_CLOSE_PARENTHESIS) { throw new Exception("Expected:')' actual:"+token.getText()+" "+token); } }else { throw new Exception("Expected:text/child query actual:"+token.getText()+" "+token); } String alias=null; token=fetchToken(); if(token.getType()==Token.TYPE_TEXT) { alias=token.getText(); }else if(token.getType()==Token.TYPE_COMMA) { returnToken(); }else if(token.getType()==Token.TYPE_AS) { token=fetchToken(); if(token.getType()!=Token.TYPE_TEXT) { throw new Exception("Expected:text actual:"+token.getText()+" "+token); } alias=token.getText(); }else { returnToken(); } field.setAlias(alias); parent.addField(field); token=fetchToken(); if(token.getType()!=Token.TYPE_COMMA) { returnToken(); break; } } // from token=fetchToken(); if(token.getType()!=Token.TYPE_FROM) { throw new Exception("Expected:'from' actual:"+token.getText()+" "+token); } parent.setKwFrom(token.getText()); // find tables for(;;) { Node tableNode=new Node(); Table table=new Table(); table.setTableNode(tableNode); parent.addTable(table); token=fetchToken(); if(token.getType()==Token.TYPE_TEXT) { table.setTableName(token.getText()); }else if(token.getType()==Token.TYPE_OPEN_PARENTHESIS) { parseNode(tableNode); token=fetchToken(); if(token.getType()!=Token.TYPE_CLOSE_PARENTHESIS) { throw new Exception("Expected:')' actual:"+token.getText()+" "+token); } } token=fetchToken(); if(token==null) { return; }else if(token.getType()==Token.TYPE_TEXT) { table.setAlias(token.getText()); }else { returnToken(); } token=fetchToken(); if(token==null) { return; }else if(token.getType()!=Token.TYPE_COMMA) { returnToken(); break; } } // where token=fetchToken(); if(token==null) { return; }else if(token.getType()==Token.TYPE_WHERE) { parent.setKwWhere(token.getText()); }else { returnToken(); return; } // find conditions for(;;) { Condition cond=new Condition(); parent.addCondition(cond); token=fetchToken(); if(token.getType()==Token.TYPE_TEXT) { cond.setLeft(token.getText()); }else { returnToken(); break; } token=fetchToken(); if(token.getType()==Token.TYPE_EQUAL || token.getType()==Token.TYPE_LESSTHAN || token.getType()==Token.TYPE_GREATERTHAN) { cond.setOprand(token.getText()); }else { throw new Exception("Expected:oprand actual:"+token.getText()+" "+token); } token=fetchToken(); if(token.getType()==Token.TYPE_TEXT) { cond.setRight(token.getText()); }else { throw new Exception("Expected:text actual:"+token.getText()+" "+token); } token=fetchToken(); if(token==null) { break; }else if(token.getType()==Token.TYPE_AND || token.getType()==Token.TYPE_OR) { cond.setJoint(token.getText()); }else { returnToken(); break; } } // order by token=fetchToken(); if(token==null) { return; }else if(token.getType()==Token.TYPE_ORDER) { String order=token.getText(); token=fetchToken(); if(token.getType()==Token.TYPE_BY) { String by=token.getText(); parent.setKwOrderby(order+" "+by); } else { throw new Exception("Expected:'by' actual:"+token.getText()+" "+token); } }else { returnToken(); return; } // find orders for(;;) { Order order=new Order(); parent.addOrder(order); token=fetchToken(); if(token.getType()==Token.TYPE_TEXT) { order.setColumn(token.getText()); }else { throw new Exception("Expected:text actual:"+token.getText()+" "+token); } token=fetchToken(); if(token==null) { return; }else if(token.getType()==Token.TYPE_ASC || token.getType()==Token.TYPE_DESC ) { order.setAscDesc(token.getText()); }else if(token.getType()==Token.TYPE_COMMA) { returnToken(); }else { throw new Exception("Expected:'asc/desc/,' actual:"+token.getText()+" "+token); } token=fetchToken(); if(token==null) { return; }else if(token.getType()==Token.TYPE_COMMA) { continue; } } } private Token fetchToken() { if(tokenIdx>=tokens.size()) { return null; }else { Token t=tokens.get(tokenIdx); tokenIdx++; return t; } } private void returnToken() { if(tokenIdx>0) { tokenIdx--; } } public Node getRoot() { return root; } }
统合类:
package com.heyang; import com.heyang.util.BracketChecker; import com.heyang.util.CommonUtil; import com.heyang.util.Renderer; public class EntryPoint { public static void main(String[] args) { try { // Read context from file String text=CommonUtil.readTextFromFile("C:\\hy\\files\\sql\\10.sql"); System.out.println("原文="+text); // Is brackets balanced BracketChecker checker=new BracketChecker(); boolean isBalanced=checker.isBalanced(text); if(isBalanced==false) { System.out.println(Renderer.paintBrown(checker.getErrMsg())); return; } // lex text to tokens Lexer lex=new Lexer(text); //lex.printTokens(); // Build tree TreeBuilder tb=new TreeBuilder(lex.getTokens()); Node root=tb.getRoot(); System.out.println("整理后文本=\n"+root); }catch(Exception ex) { System.out.println(Renderer.paintBrown(ex.getMessage())); ex.printStackTrace(); } } }
执行效果:
原文=select name as name,birthday as b,column c from tc c,tb b,(select a1,a2,a3 from (select * from tbc) tbc) ta 整理后文本= select name name, birthday b, column c from tc c, tb b, ( select a1, a2, a3 from ( select * from tbc) tbc) ta
--2020.05.27--