mongo2csv
#!/usr/bin/env python3 import pymongo import datetime import os yesterday = str(datetime.date.today() - datetime.timedelta(days=1)) def mongo2csv(db_name, ts_string=yesterday, column_handle=None, column_delete=None): ''' :param db_name: 表名 :param ts_string: 日期 :param column_handle: 需要处理的列, 格式{k:v} , k:需要处理的列, v:1.数字,即列表索引,取列表的第几个元素 2.字符串,即字典key,取子字典的某个value 3.join ,即需要将列表转为字符串 :param column_delete: 需要删除的列 :return: ''' print('starting...') host = '127.0.0.1' port = 5600 user = 'root' password = '' url = 'mongodb://' + user + ':' + password + '@' + host + ':' + str(port) + '/' client = pymongo.MongoClient(url) db = client.rental collection = db[db_name] select_condition = {'_id': 0, 'ts': 0} if not column_delete: column_delete = [] if not column_handle: column_handle = {} if len(column_delete) > 0: for temp in column_delete: select_condition[temp] = 0 title_dic = collection.find_one({'ts_string': ts_string}, select_condition) title = sorted(title_dic.keys()) con = collection.find({'ts_string': ts_string}, select_condition) file_name = db_name + '_' + yesterday + '.csv' with open(file_name, 'a', encoding='utf8') as f: f.write(','.join(title) + '\n') for item in con: for k, v in column_handle.items(): item[k] = item[k][v] if v != 'join' else ( '|'.join([str(x) for x in item[k]]) if isinstance(item[k], list) else item[k]) f.write(','.join([str(item[x]) for x in title]) + '\n') if __name__ == '__main__': mongo2csv('lianjia_detail') mongo2csv('mogu_detail', column_handle={'metroInfo': 0, 'rentType': 'value'}) mongo2csv('qingke_detail', column_handle={}) mongo2csv('xiangyu_detail', column_delete=['endDate','vacantStartDate','tabList','vacantEndDate']) mongo2csv('ziru_detail', column_handle={'subway_line_code': 'join', 'subway_station_code': 'join'}) print(os.system('wc -l *.csv'))