预处理算法_7_数据去重

 

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# <editable>

"""
columns
特征
进行操作的所有列
"""


def execute():
    # <editable>
    '''
    载入模块
    '''
    import pandas as pd
    # import db_utils
    from sqlalchemy import create_engine
    '''
    连接数据库
    '''
    engine = create_engine('mysql+pymysql://root:123123qwe@127.0.0.1:3306/analysis')
    params = {
        "left_columns": "score",
    }
    inputs = {"table": 'test'}
    '''
    选择目标数据
    '''
    left_sql = 'select ' + params['left_columns'] + ' from ' + inputs['table']
    data_in = pd.read_sql_query(left_sql, engine)
    print(data_in)
    '''
    去除重复
    '''
    data_in.drop_duplicates(inplace=True)
    data_out = data_in
    '''
    将结果写出
    '''
    print(data_out)
    """
    数据示例
       score
    0   80.0
    1   20.0
    2    NaN
    3    5.0
    4    4.0
    5   20.0
       score
    0   80.0
    1   20.0
    2    NaN
    3    5.0
    4    4.0
    """

# </editable>

if __name__ == '__main__':
    execute()

 

posted @ 2021-03-03 19:53  我当道士那儿些年  阅读(76)  评论(0编辑  收藏  举报