读写文件
2019-11-27 19:26 xplorerthik 阅读(172) 评论(0) 编辑 收藏 举报# -*- coding: utf-8 -*-
"""
Created on Tue Oct 22 10:57:39 2019
@author: baoxinping
"""
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import codecs
filename = 'D:/car_series_words.txt'
#filename = '../data/newWords/history_'+'2019-09-14'
fin = open(filename,'r')
reader = codecs.getreader('utf-8')(fin)
carSeries = reader.readlines()
#fin.close
fout = open('d:/newquery_.txt', 'w')
writer = codecs.getwriter('utf-8')(fout)
tagList = [u'新闻',u'评测',u'导购',u'赛车',u'比赛',u'改装']#,'博物馆','技术','拆解','交通违章查询','违章查询','保养经验','维修经验','用车成本','保养成本']
for carserie in carSeries:
for tag in tagList:
line = str(carserie.strip()) + " " + str(tag)+"\n"
#print line
writer.write(line)
fout.close
filename = 'D:/splitAmbiSeries_bxp.txt'
#filename = '../data/newWords/history_'+'2019-09-14'
fin = open(filename,'r')
reader = codecs.getreader('utf-8')(fin)
AmbiSeries =reader.readlines()
#wordsCand = set()
wordsCand = set()
print len(wordsCand)
for word_ in AmbiSeries:
word = word_.strip()
wordsCand.add(word)
for word in oldAmbiSeries:
word0 = word.strip()
wordsCand.add(word0)
fout = open('d:/newAmbiSeries.txt', 'w')
writer = codecs.getwriter('utf-8')(fout)
for i in wordsCand:
word = i.strip()+"\n"
writer.write(word)