ʕ·͡ˑ·ཻʔ Daisy 🐾 ◟̆◞̆♥︎
Zou-Wang
点击头像关注

三っ•̀.̫•́)っ 我去宇宙了 ⁽⁽ଘ( ˊᵕˋ )ଓ⁾⁾

python中文本的读写操作

文本的操作

函数的排序操作:

def func(i):
    return i[2]
list=[('曹操',101,'c'),('吕布',100,'d'),('刘备',200,'l'),('大乔',50,'x')]
list.sort(key=func)
#如果自己写个排序算法,无法如何都要把里面的值取出来一次的
print(list)

文本的读写操作:

##往文件内写入数据,覆盖写入
f=open(r"E:\实习\编程\01\day06\1.txt",'w')
data=f.write('xxxxxxxxxxxx')
f.close()
##将文本中的内容读出来
f=open(r"E:\实习\编程\01\day06\1.txt",'r')
data=f.read()
print(data)
f.close
##将一张图片读出来
f=open(r"E:\实习\编程\01\day06\2.jpg",'rb')
data=f.read()
print(data)
f.close

词频统计:

英文:

f=open(r'E:\实习\编程\01\day06\22.txt','r')     ##打开文件
data=f.read().lower()                          ##将文件内的内容转换为小写
data_split=data.split(' ')                     ##以空格进行分割
count_dict={}                                  ##创建一个空字典
for word in data_split:                     ##开始for循环,判断word是否在字典内如果不
    if word not in count_dict:              ##在字典内初始值为1,如果在字典内加一
        count_dict[word]=1                   
    else:
        count_dict[word]+=1
def func(i):                                 ##定义一个func函数,把字典转换成一个列表
    return i[1]
lt=list(count_dict.items())
lt.sort(key=func)                            ##把列表进行倒序排序
lt.reverse()
for i in lt[0:10]:                            ##用for循环循环列表并输出
    print(f'{i[0]:^7}{i[1]^5}')

中文:

import jieba
f=open(r'E:\实习\编程\01\day06\threekingdoms.txt','r',encoding='utf8')
data=f.read()
data_jieba = jieba.lcut(data)
count_dict={}
for word in data_jieba:
    if len(word)==1:
        continue
    if word in {"将军","却是","荆州","二人","不可","不幸","却说","不能","如此","商议","如何","追赶","二十余","听令","不计其数","欣然","大汉","丞相","主公","军士","左右","军马","不如","赶来","引兵","次曰","荆州","大喜","朝廷","当先","传令","次日","天下","东吴","于是","今日","不敢","魏兵","陛下"}:
        continue
    if '曰' in word:
        word=word.replace('曰','')
    if '云长' in word:
        word=word.replace('云长','关公')
    if word in count_dict:
        count_dict[word]+=1
    else:
        count_dict[word]=1
def func(i):
    return i[1]
data_list=list(count_dict.items())
data_list.sort(key=func)
data_list.reverse()
for i in data_list[0:10]:                            ##用for循环循环列表并输出
    print(f'{i[0]:^7}{i[1]^5}')

词云:

import wordcloud
from imageio import imread
mask=imread(r'E:\实习\编程\01\day06\1.png')
f=open(r'E:\实习\编程\01\day06\22.txt','r',encoding='utf8')
data=f.read()
w=wordcloud.WordCloud(font_path=r'C:\Windows\Fonts\simfang',mask=mask,width=500,height=600,
background_color="white")
w.generate(data)
w.to_file('outfile.png')

posted @ 2019-07-21 18:21  没忘  阅读(484)  评论(0编辑  收藏  举报