def get_qq_0528(): import math,re,json cnt = 0 SEG = 5 REGEX = re.compile(r'(\d{4})(-|/)(\d{2})(-|/)(\d{2})') for file in FILE_LIST: with open(os.path.join(PATH, file), 'r', encoding='utf8') as fr: with open(os.path.join('./data/qq_data/', 'qq_data_0528.json'), 'w', encoding='utf8') as json_file: accept_content = [] content = fr.readlines()[8:] for each in content: if each.replace('\n', '') != '': new_str = REGEX.sub('######', each) clean_each = new_str.strip() if '######' in clean_each : accept_content.append('#######\n') continue elif clean_each.strip() == '': continue else: # DATA = {"id": "", "content": ""} # DATA["id"] = cnt # DATA['content'] = clean_each # accept_content.append(DATA) # print(DATA) cnt += 1 accept_content.append(clean_each) json.dump(accept_content, json_file, ensure_ascii=False) print('总共有:{}'.format(cnt))