字符串操作,文件操作,英文词频统计预处理
本次作业的来源https://edu.cnblogs.com/campus/gzcc/GZCC-16SE1/homework/2684
1.字符串操作:
- 解析身份证号:生日、性别、出生地等。
IdCard=input("请输入你的身份证号:") if len(IdCard) == 18: print("你的身份号是:"+IdCard) else: print("输入有误") IdPlace = IdCard[0:6] IdBirth = IdCard[6:14] IdSex = IdCard[14:17] print("出生地为:"+IdPlace) year = IdBirth[0:4] month = IdBirth [4:6] day = IdBirth[6:8] print("生日:{}年{}月{}日".format(year,month,day)) province=IdCard[0:2] city = IdCard[2:4] county = IdCard[4:6] print("你的出生地为:{}省{}市{}县".format(province,city,county)) if int(IdSex)%2 == 0: print("性别:女") else: print("性别:男")
- 凯撒密码编码与解码
import os def encryption(): str_raw = input("请输入明文:") k = int(input("请输入位移值:")) str_change = str_raw.lower() str_list = list(str_change) str_list_encry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) < 123-k: str_list_encry[i] = chr(ord(str_list[i]) + k) else: str_list_encry[i] = chr(ord(str_list[i]) + k - 26) i = i+1 print ("加密结果为:"+"".join(str_list_encry)) def decryption(): str_raw = input("请输入密文:") k = int(input("请输入位移值:")) str_change = str_raw.lower() str_list = list(str_change) str_list_decry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) >= 97+k: str_list_decry[i] = chr(ord(str_list[i]) - k) else: str_list_decry[i] = chr(ord(str_list[i]) + 26 - k) i = i+1 print ("解密结果为:"+"".join(str_list_decry)) while True: print (u"1. 加密") print (u"2. 解密") choice = input("请选择:") if choice == "1": encryption() elif choice == "2": decryption() else: print (u"您的输入有误!")
- 网址观察与批量生成
for i in range(2,10): url='http://news.gzcc.cn/html/xiaoyuanxinwen/{}.html'.format(i) print(url)
2.英文词频统计预处理
- 下载一首英文的歌词或文章或小说。
- 将所有大写转换为小写
- 将所有其他做分隔符(,.?!)替换为空格
- 分隔出一个一个的单词
- 并统计单词出现的次数。
article =''' Big data analytics and business analytics by Duan, Lian; Xiong, Ye Over the past few decades, with the development of automatic identification, data capture and storage technologies, people generate data much faster and collect data much bigger than ever before in business, science, engineering, education and other areas. Big data has emerged as an important area of study for both practitioners and researchers. It has huge impacts on data-related problems. In this paper, we identify the key issues related to big data analytics and then investigate its applications specifically related to business problems. ''' split = article.split() print(split) #使用空格替换标点符号 article = article.replace(",","").replace(".","").replace(":","").replace(";","").replace("?","") #大写字母转换成小写字母 exchange = article.lower(); print(exchange) #生成单词列表 list = exchange.split() print(list) #生成词频统计 dic = {} for i in list: count = list.count(i) dic[i] = count print(dic) #排除特定单词 word = {'and','the','with','in','by','its','for','of','an','to'} for i in word: del(dic[i]) print(dic) #排序 dic1= sorted(dic.items(),key=lambda d:d[1],reverse= True) print(dic1) #输出词频最大的前十位单词 for i in range(10): print(dic1[i])
3.文件操作
- 同一目录、绝对路径、相对路径
//同一目录 fo=open('cipher.txt','r',encoding='utf8') content=fo.read() fo.close() print(content,end='') //绝对路径 fo=open(r'C:/Users/Czc/PycharmProjects/untitled1/aa.py','r',encoding='utf8') content=fo.read() fo.close() print(content,end='') //相对路径 fo=open(r'./cipher.txt','r',encoding='utf8') content=fo.read() fo.close() print(content,end='')
- 凯撒密码:从文件读入密函,进行加密或解密,保存到文件。
file=open("cipher.txt") a = file.read() print(a) cipher=''; jiemi=''; for i in a: cipher=cipher+chr(ord(i)+3); print("加密后的密码:",cipher) file=open("cipher.txt",'w') file.write(cipher) file.close()
- 词频统计:下载一首英文的歌词或文章或小说,保存为utf8文件。从文件读入文本进行处理。
#coding=utf-8 file=open("bin.txt") text=file.read(); file.close(); s=",.?!" for i in s: text=text.replace(i," ") text=text.lower().split() print(text) count={} for i in text: try: count[i]=count[i]+1 except KeyError: count[i]=1 print(count)
4.函数定义
- 加密函数
def encryption(): str_raw = input("请输入明文:") k = int(input("请输入位移值:")) str_change = str_raw.lower() str_list = list(str_change) str_list_encry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) < 123-k: str_list_encry[i] = chr(ord(str_list[i]) + k) else: str_list_encry[i] = chr(ord(str_list[i]) + k - 26) i = i+1 print ("加密结果为:"+"".join(str_list_encry))
- 解密函数
def decryption(): str_raw = input("请输入密文:") k = int(input("请输入位移值:")) str_change = str_raw.lower() str_list = list(str_change) str_list_decry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) >= 97+k: str_list_decry[i] = chr(ord(str_list[i]) - k) else: str_list_decry[i] = chr(ord(str_list[i]) + 26 - k) i = i+1 print ("解密结果为:"+"".join(str_list_decry))
- 读文本函数
def readFile(filePath): file=open(filePath,'r',encoding='utf-8') return file.read()