网站更新内容:请访问: https://bigdata.ministep.cn/

pandas针对某一列循环批量下载

根据values实现批量下载

from py_function_tools import odps_read_sql,write_excle,write_to_database,hive_read_sql
from py_function_tools import odps_write_dataframe,odps_read_table,write_database_from_odps
import time
#df_v1 = odps_read_sql(sql) # 读取odps数据集
#df_v2 = hive_read_sql(sql) # 读取hive数据集,需要手动指定表所在的数据库
#write_excle(df,file_name) #默认保存在downloads目录下
#write_to_database(df,table_name) #存到mysql中ypp数据库
#from pandas_dataframe_agg import dataframe_agg
#table =dataframe_agg(df,dimensions,func=func) # groupby 处理

def write_csv(to_path,data):
    data.to_csv(to_path,index=False, encoding='utf_8_sig')
    return 'successful write'


i_list = ["202001","202002","202003","202004","202005","202006","202007","202008","202009","202010","202011","202012"]

if __name__ == "__main__":
    path = '/Users/xxx/Downloads'
    for i in i_list:
        start_time = time.time() # 开始时间
        print(i)
        sql = """
        select * 
        from temp_0205_kelly 
        where month = '{m}'
        """.format(m=i)
        df_v1 = odps_read_sql(sql) # 读取odps数据集
        print('*'*10)
        print(sql)
        table_name = 'kelly_month'+i
        print(table_name)
        file_name ='{table_name}.csv'.format(table_name=table_name)
        print(file_name)
        to_path = path+"/"+file_name
        print(to_path)
        write_csv(to_path,df_v1)
        end_time = time.time() #结束时间
        print("程序耗时%f秒." % (end_time - start_time))

补充:根据sql的where条件循环下载

posted @ 2021-02-19 16:35  ministep88  阅读(49)  评论(0编辑  收藏  举报
网站更新内容:请访问:https://bigdata.ministep.cn/