字符串操作、文件操作,英文词频统计预处理
作业要求:https://edu.cnblogs.com/campus/gzcc/GZCC-16SE1/homework/2684
1.字符串操作:
- 解析身份证号:生日、性别、出生地等。
代码如下:
import datetime class GetInformation(object): def __init__(self, id): self.id = id self.birth_area = self.id[0:2] self.birth_year = int(self.id[6:10]) self.birth_month = int(self.id[10:12]) self.birth_day = int(self.id[12:14]) #地区 def get_birth_area(self): area = {"11": "北京", "12": "天津", "13": "河北", "14": "山西", "15": "内蒙古", "21": "辽宁", "22": "吉林", "23": "黑龙江", "31": "上海", "32": "江苏", "33": "浙江", "34": "安徽", "35": "福建", "36": "江西", "37": "山东", "41": "河南", "42": "湖北", "43": "湖南", "44": "广东", "45": "广西", "46": "海南", "50": "重庆", "51": "四川", "52": "贵州", "53": "云南", "54": "西藏", "61": "陕西", "62": "甘肃", "63": "青海", "64": "宁夏", "65": "新疆", "71": "台湾", "81": "香港", "82": "澳门", "91": "国外"} return area[self.birth_area] #出生日期 def get_birthday(self): birthday = "{}-{}-{}".format(self.birth_year, self.birth_month, self.birth_day) return birthday #性别 def get_sex(self): num = int(self.id[16:17]) if num % 2 == 0: return "女" else: return "男" #年龄 def get_age(self): now = (datetime.datetime.now() + datetime.timedelta(days=1)) year = now.year month = now.month day = now.day if year == self.birth_year: return 0 else: if self.birth_month > month or (self.birth_month == month and self.birth_day > day): return year - self.birth_year - 1 else: return year - self.birth_year ID = input("请输入一个合法的真实的身份证号码:") birtharea = GetInformation(ID).get_birth_area() birthday = GetInformation(ID).get_birthday() age = str(GetInformation(ID).get_age()) sex = GetInformation(ID).get_sex() print("出生地:"+birtharea+"\n出生日期为:"+birthday+",年龄"+age+"岁,性别:"+sex)
- 凯撒密码编码与解码
- 网址观察与批量生成
代码如下:
import webbrowser for i in range(2,8): url='http://news.gzcc.cn/html/xiaoyuanxinwen/{}.html'.format(i) webbrowser.open(url) print(url)
2.英文词频统计预处理
- 下载一首英文的歌词或文章或小说。
- 将所有大写转换为小写
- 将所有其他做分隔符(,.?!)替换为空格
- 分隔出一个一个的单词
- 并统计单词出现的次数。
代码如下:
import os def read_file(): f = open('C:\\Users\\leo\\Desktop\\you raise me up.txt', 'r', encoding='utf-8') content = f.read() f.close() return content text = read_file() for i in ',.?;-': text = text.replace(i," ") song = text.lower().split() from collections import Counter wd = Counter(song) print("词频统计--出现次数为5次以上的") print(wd.most_common(5))
3.文件操作
- 同一目录、绝对路径、相对路径
- 凯撒密码:从文件读入密函,进行加密或解密,保存到文件。
- 词频统计:下载一首英文的歌词或文章或小说,保存为utf8文件。从文件读入文本进行处理。
代码如下:
import os #读文件 def read_file(): f = open('kaisamiwen.txt', 'r', encoding='utf-8') content = f.read() f.close() return content #写文件 def write_file(word): f = open('kaisamiwen.txt', 'w', encoding='utf-8') f.write(word) f.close() def encryption(): str_change = read_file() str_list = list(str_change) str_list_encry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) < 123-3: str_list_encry[i] = chr(ord(str_list[i]) + 3) else: str_list_encry[i] = chr(ord(str_list[i]) + 3 - 26) i = i+1 write_file("".join(str_list_encry)) print ("加密结果为:"+"".join(str_list_encry)) def decryption(): str_change = read_file() str_list = list(str_change) str_list_decry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) >= 97+3: str_list_decry[i] = chr(ord(str_list[i]) - 3) else: str_list_decry[i] = chr(ord(str_list[i]) + 26 - 3) i = i+1 write_file("".join(str_list_decry)) print ("解密结果为:"+"".join(str_list_decry)) while True: print (u"1. 加密") print (u"2. 解密") choice = input("请选择:") if choice == "1": encryption() elif choice == "2": decryption() else: print (u"您的输入有误!")
4.函数定义
- 加密函数
代码如下:
def encryption(): str_change = read_file() str_list = list(str_change) str_list_encry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) < 123-3: str_list_encry[i] = chr(ord(str_list[i]) + 3) else: str_list_encry[i] = chr(ord(str_list[i]) + 3 - 26) i = i+1 write_file("".join(str_list_encry)) print ("加密结果为:"+"".join(str_list_encry))
- 解密函数
代码如下:
def decryption(): str_change = read_file() str_list = list(str_change) str_list_decry = str_list i = 0 while i < len(str_list): if ord(str_list[i]) >= 97+3: str_list_decry[i] = chr(ord(str_list[i]) - 3) else: str_list_decry[i] = chr(ord(str_list[i]) + 26 - 3) i = i+1 write_file("".join(str_list_decry)) print ("解密结果为:"+"".join(str_list_decry))
- 读文本函数
代码如下:
#读文件 def read_file(): f = open('kaisamiwen.txt', 'r', encoding='utf-8') content = f.read() f.close() return content #写文件 def write_file(word): f = open('kaisamiwen.txt', 'w', encoding='utf-8') f.write(word) f.close()