生成大量小文件 异常
from ProjectUtil.usingModuleTOMODIFY import getNow from pymongo import MongoClient # mongo key host, username, password = '10.14.14.12', 'ain', 'ad' uri = "mongodb://%s:%s@%s" % (username, password, host,) q_f_export, q_export = '/data/bigdata/mongoexport/superpub-ask-question.csv', [] q_f_mysql, q_mysql = '/data/bigdata/mongoexport/question.txt', [] q_f_distinct, q_distinct = '{}-distinct'.format(q_f_export), [] MYSQL_max_q_id = 3979647 with open(q_f_distinct, 'r', encoding='utf-8') as fr: q_distinct = [i.rstrip('\n') for i in fr] fr.close() start_ = getNow() mongo_client = MongoClient(uri) db = mongo_client.superpub mongo_collection = db.ask def get_momgo_res(question): global mongo_collection cursor = mongo_collection.find({"question": question}, {'answer': 1}) a = [] while cursor.alive: for doc in cursor: this_a = doc['answer'] if this_a not in a: a.append(this_a) return a def w(f, s): with open(f, 'w', encoding='utf-8') as fw: fw.write(s) fw.close() le_ = len(q_distinct) c = MYSQL_max_q_id + 1 dir_ = '/data/bigdata/mongoexport/QA/' for q in q_distinct: try: a = get_momgo_res(q) if len(a) == 0: continue else: f = '{}{}q'.format(dir_, c) w(f, q) c += 1 index_ = 0 for i in a: f = '{}{}a{}'.format(dir_, c, index_) w(f, i) index_ += 1 except Exception as e: print(e) if c % 10000 == 0: print(c - MYSQL_max_q_id, '/', le_, start_, ':', getNow())