使用python脚本自动删除sql文件中的注释

python脚本

from enum import Enum


# 定义状态的枚举
class State(Enum):

    CODE = 0                        # 代码
    SLASH = 1                       # 斜杠
    NOTE_MULTILINE = 2              # 多行注释
    NOTE_MULTILINE_STAR = 3         # 多行注释遇到*
    NOTE_SINGLELINE = 4             # 单行注释
    BACKSLASH = 5                   # 拆行注释
    CODE_CHAR = 6                   # 字符
    CHAR_ESCAPE_SEQUENCE = 7        # 字符中的转义字符
    CODE_STRING = 8                 # 字符串
    STRING_ESCAPE_SEQUENCE = 9      # 字符串中的转义字符
    STRIGULA = 10                   # 短横线


# 删除sql语句中的注释
def delete_sql_note(str):
    # TODO 两种情况:"--"与"/*...*/"
    s = ""
    # 初始状态定义为代码
    state = State.CODE
    for c in str:
        if state == State.CODE:
            # 遇到单斜杠
            if c == '/':
                # 将状态改为单斜杠
                state = State.SLASH
            # 遇上短横线
            elif c == "-":
                state = State.STRIGULA
            else:
                s += c
                # 在代码中遇到字符
                if c == '\'':
                    state = State.CODE_CHAR
                # 在代码中遇到字符串
                elif c == '\"':
                    state = State.CODE_STRING

        # 如果遇到单横线
        elif state == State.STRIGULA:
            # 后面接着跟一个单横线,则说明是单行注释
            if c == '-':
                state = State.NOTE_SINGLELINE
            # 如果后面跟的是其他字符,则将状态转为代码
            else:
                s += "-" + c
                state = State.CODE

        # 遇到单斜杠
        elif state == State.SLASH:
            # 单斜杠后面再遇到*号,则说明是多行注释
            if c == '*':
                state = State.NOTE_MULTILINE
            # 单斜杠后面再遇到单斜杠,说明是单行注释
            elif c == '/':
                state = State.NOTE_SINGLELINE
            # 如果是其他,则将其添加到字符串中
            else:
                s += "/"
                s += c
                state = State.CODE

        # 遇到多行注释
        elif state == State.NOTE_MULTILINE:
            # 多行注释后面遇到*号
            if c == '*':
                state = State.NOTE_MULTILINE_STAR
            # 多行注释后面换行了
            else:
                if c == '\n':
                    s += '\r\n'
                # 则当前状态还是多行注释
                state = State.NOTE_MULTILINE
        # 多行注释后面又遇到*号
        elif state == State.NOTE_MULTILINE_STAR:
            # 如果*号后面又是单斜杠在,则说明注释结束了
            if c == '/':
                # 将状态改为代码
                state = State.CODE
            # 如果*号后面又是*号,则说明还是多行注释遇到*
            elif c == '*':
                state = State.NOTE_MULTILINE_STAR
            # 其他情况还是多行注释
            else:
                state = State.NOTE_MULTILINE
        # 单行注释
        elif state == State.NOTE_SINGLELINE:
            if c == '\\':
                state = State.BACKSLASH
            # 如果遇到换行符,则说明单行注释结束了
            elif c == '\n':
                s += '\r\n'
                state = State.CODE
            # 如果是其他情况,则说明单行注释还没有结束
            else:
                state = State.NOTE_SINGLELINE
        # 拆行注释
        elif state == State.BACKSLASH:
            if c == '\\' or c == '\r' or c == '\n':
                if c == '\n':
                    s += '\r\n'
                state = State.BACKSLASH
            else:
                state = State.NOTE_SINGLELINE
        # 代码中遇到单引号
        elif state == State.CODE_CHAR:
            s += c
            # 字符串中的转义字符
            if c == '\\':
                state = State.CHAR_ESCAPE_SEQUENCE
            # 再次遇到单引号,则说明字符串内容结束了。
            elif c == '\'':
                state = State.CODE
            # 如果是其他情况,则还是在字符串中
            else:
                state = State.CODE_CHAR
        # 字符串中遇到转义字符
        elif state == State.CHAR_ESCAPE_SEQUENCE:
            s += c
            state = State.CODE_CHAR
        # 字符串
        elif state == State.CODE_STRING:
            s += c
            if c == '\\':
                state = State.STRING_ESCAPE_SEQUENCE
            # 字符串内容结束
            elif c == '\"':
                state = State.CODE
            else:
                state = State.CODE_STRING
        elif state == State.STRING_ESCAPE_SEQUENCE:
            s += c
            state = State.CODE_STRING
    return s


if __name__ == '__main__':
    with open("../data/test.sql", 'r', encoding='utf-8') as f:
        f = f.read()
        res = delete_sql_note(f)
        print(res)

测试数据

/*
 * Procedure: currently_disabled()
 *
 * Show all disabled events / consumers
 *
 * Parameters
 *   show_instruments: Whether to show instrument configuration as well
 *
 * Versions: 5.5+
 *
 * mysql> call currently_disabled(true);
 * +----------------------------+
 * | performance_schema_enabled |
 * +----------------------------+
 * |                          1 |
 * +----------------------------+
 * 1 row in set (0.00 sec)
 * 
 * +--------------------------------+
 * | disabled_consumers             |
 * +--------------------------------+
 * | events_stages_current          |
 * | events_stages_history          |
 * | events_stages_history_long     |
 * | events_statements_history      |
 * | events_statements_history_long |
 * | events_waits_current           |
 * | events_waits_history           |
 * | events_waits_history_long      |
 * +--------------------------------+
 * 8 rows in set (0.00 sec)
 * 
 * +---------------------------------------------------------------------------------------+-------+
 * | disabled_instruments                                                                  | timed |
 * +---------------------------------------------------------------------------------------+-------+
 * | wait/synch/mutex/sql/PAGE::lock                                                       | NO    |
 * | wait/synch/mutex/sql/TC_LOG_MMAP::LOCK_sync                                           | NO    |
 * | wait/synch/mutex/sql/TC_LOG_MMAP::LOCK_active                                         | NO    |
 * ...
 * | stage/sql/Waiting for event metadata lock                                             | NO    |
 * | stage/sql/Waiting for commit lock                                                     | NO    |
 * | wait/io/socket/sql/server_tcpip_socket                                                | NO    |
 * | wait/io/socket/sql/server_unix_socket                                                 | NO    |
 * | wait/io/socket/sql/client_connection                                                  | NO    |
 * +---------------------------------------------------------------------------------------+-------+
 * 302 rows in set (0.03 sec)
 * 
 * Query OK, 0 rows affected (1.19 sec)
 */

DROP PROCEDURE IF EXISTS currently_disabled;

DELIMITER $$
-- 这是单行注释
CREATE PROCEDURE currently_disabled(show_instruments BOOLEAN)
    COMMENT 'Parameters: show_instruments (boolean)'
    SQL SECURITY INVOKER 
BEGIN
    SELECT @@performance_schema AS performance_schema_enabled;

    SELECT name AS disabled_consumers
      FROM performance_schema.setup_consumers
     WHERE enabled = 'NO';

    IF (show_instruments) THEN
        SELECT name AS disabled_instruments,
               timed
          FROM performance_schema.setup_instruments
         WHERE enabled = 'NO';
    END IF;
END$$

DELIMITER ;

删除注释之后的数据

DROP PROCEDURE IF EXISTS currently_disabled;
select user-name, id from data;
DELIMITER $$

CREATE PROCEDURE currently_disabled(show_instruments BOOLEAN)
    COMMENT 'Parameters: show_instruments (boolean)'
    SQL SECURITY INVOKER 
BEGIN
    SELECT @@performance_schema AS performance_schema_enabled;

    SELECT name AS disabled_consumers
      FROM performance_schema.setup_consumers
     WHERE enabled = 'NO';

    IF (show_instruments) THEN
        SELECT name AS disabled_instruments,
               timed
          FROM performance_schema.setup_instruments
         WHERE enabled = 'NO';
    END IF;
END$$

DELIMITER ;
posted @ 2021-09-15 10:00  凯尔哥  阅读(601)  评论(0编辑  收藏  举报