生成大量小文件 异常

 

from ProjectUtil.usingModuleTOMODIFY import getNow
from pymongo import MongoClient

# mongo key
host, username, password = '10.14.14.12', 'ain', 'ad'
uri = "mongodb://%s:%s@%s" % (username, password, host,)

q_f_export, q_export = '/data/bigdata/mongoexport/superpub-ask-question.csv', []
q_f_mysql, q_mysql = '/data/bigdata/mongoexport/question.txt', []
q_f_distinct, q_distinct = '{}-distinct'.format(q_f_export), []

MYSQL_max_q_id = 3979647

with open(q_f_distinct, 'r', encoding='utf-8') as fr:
    q_distinct = [i.rstrip('\n') for i in fr]
    fr.close()

start_ = getNow()

mongo_client = MongoClient(uri)
db = mongo_client.superpub
mongo_collection = db.ask


def get_momgo_res(question):
    global mongo_collection
    cursor = mongo_collection.find({"question": question}, {'answer': 1})
    a = []
    while cursor.alive:
        for doc in cursor:
            this_a = doc['answer']
            if this_a not in a:
                a.append(this_a)
    return a


def w(f, s):
    with open(f, 'w', encoding='utf-8') as fw:
        fw.write(s)
        fw.close()


le_ = len(q_distinct)
c = MYSQL_max_q_id + 1
dir_ = '/data/bigdata/mongoexport/QA/'
for q in q_distinct:
    try:
        a = get_momgo_res(q)
        if len(a) == 0:
            continue
        else:
            f = '{}{}q'.format(dir_, c)
            w(f, q)
            c += 1
            index_ = 0
            for i in a:
                f = '{}{}a{}'.format(dir_, c, index_)
                w(f, i)
                index_ += 1
    except Exception as e:
        print(e)
    if c % 10000 == 0:
        print(c - MYSQL_max_q_id, '/', le_, start_, ':',
              getNow())

  

 

posted @ 2018-12-18 09:59  papering  阅读(184)  评论(0编辑  收藏  举报