1 # 进入的path是个文件夹路径,里面的文件是txt
 2 def eachFile(filepath):
 3     pathDir = os.listdir(filepath)  # 获取当前路径下的文件名,返回List
 4     title = []
 5     key1 = [];key2 = [];key3 = [];key4 = [];key5 = [];key6 = [];key7 = [];key8 = [];key9 = [];key10 = []
 6     key_words = [key1, key2, key3, key4, key5, key6, key7, key8, key9, key10]
 7     for s in pathDir:
 8         newDir = os.path.join(filepath, s)  # 将文件命加入到当前文件路径后面
 9         if os.path.isfile(newDir) and os.path.splitext(newDir)[1] == ".txt":  # 如果是txt文件
10             with open(newDir, 'r') as f:
11                 content = f.read()
12                 if content:
13                     title.append(s.replace('.txt', ''))
14                     tags = jieba.analyse.extract_tags(content,
15                             topK=10, allowPOS=('n','v'))
16                     for i in range(len(key_words)):
17                         try:
18                             key_words[i].append(tags[i])
19                         except:
20                             key_words[i].append('') 
21 
22     result = pd.DataFrame({'title': title, 'key1': key1,
23                            'key2': key2, 'key3': key3, 'key4': key4,
24                            'key5': key5, 'key6': key6, 'key7': key7,
25                            'key8': key8, 'key9': key9, 'key10': key10, },
26                           columns=['title', 'key1', 'key2', 'key3', 'key4', 'key5',
27                                    'key6', 'key7', 'key8', 'key9', 'key10', ])
28     
29     return result
eachFile Code

 

posted on 2018-09-27 11:26  Ming_noob  阅读(151)  评论(0编辑  收藏  举报