读写文件

2019-11-27 19:26 xplorerthik 阅读(182) 评论(0) 收藏举报

# -*- coding: utf-8 -*-
"""
Created on Tue Oct 22 10:57:39 2019

@author: baoxinping
"""
import sys
reload(sys)
sys.setdefaultencoding('utf8')

import codecs

filename = 'D:/car_series_words.txt'
#filename = '../data/newWords/history_'+'2019-09-14'
fin = open(filename,'r')
reader = codecs.getreader('utf-8')(fin)
carSeries = reader.readlines()
#fin.close

fout = open('d:/newquery_.txt', 'w')
writer = codecs.getwriter('utf-8')(fout)
tagList = [u'新闻',u'评测',u'导购',u'赛车',u'比赛',u'改装']#,'博物馆','技术','拆解','交通违章查询','违章查询','保养经验','维修经验','用车成本','保养成本']

for carserie in carSeries:
for tag in tagList:
line = str(carserie.strip()) + " " + str(tag)+"\n"
#print line
writer.write(line)
fout.close

filename = 'D:/splitAmbiSeries_bxp.txt'
#filename = '../data/newWords/history_'+'2019-09-14'
fin = open(filename,'r')
reader = codecs.getreader('utf-8')(fin)
AmbiSeries =reader.readlines()

#wordsCand = set()
wordsCand = set()
print len(wordsCand)
for word_ in AmbiSeries:
word = word_.strip()
wordsCand.add(word)
for word in oldAmbiSeries:
word0 = word.strip()
wordsCand.add(word0)
fout = open('d:/newAmbiSeries.txt', 'w')
writer = codecs.getwriter('utf-8')(fout)
for i in wordsCand:
word = i.strip()+"\n"
writer.write(word)

刷新页面返回顶部

xplorerthik

读写文件

About