字符串操作、文件操作,英文词频统计预处理
一、字符串操作
- 解析身份证号:生日、性别、出生地等。
1 provinces = {'11': '北京市', '12': '天津市', '13': '河北省', '14': '山西省', '15': '内蒙古自治区', '21': '辽宁省',\ 2 '22': '吉林省', '23': '黑龙江省', '31': '上海市', '32': '江苏省', '33': '浙江省', '34': '安徽省', \ 3 '35': '福建省', '36': '江西省', '37': '山东省', '41': '河南省', '42': '湖北省', '43': '湖南省', \ 4 '44': '广东省', '45': '广西壮族自治区', '46': '海南省', '50': '重庆市', '51': '四川省', '52': '贵州省', \ 5 '53': '云南省', '54': '西藏自治区', '61': '陕西省', '62': '甘肃省', '63': '青海省', '64': '宁夏回族自治区', \ 6 '65': '新疆维吾尔自治区', '71': '台湾省','81': '香港特别行政区', '82': '澳门特别行政区'} 7 city = {"01": "广州市", "02": "韶关市", "03": "深圳市", "04": "珠海市", "05": "汕头市", "06": "佛山市", "07": "江门市", \ 8 "08": "湛江市", "09": "茂名市", "12": "肇庆市", "13": "惠州市", "14": "梅州市", "15": "汕尾市", "16": "河源市", \ 9 "17": "阳江市", "18": "清远市", "19": "东莞市", "20": "中山市", "51": "潮州市", "52": "揭阳市", "53": "云浮市"} 10 id = input('请输入十八位身份证号码: ') 11 id_check = id[17] 12 id_add = id[0:6] 13 id_birth = id[6:14] 14 id_sex = id[14:17] 15 birth_year = id_birth[0:4] 16 birth_moon = id_birth[4:6] 17 birth_day = id_birth[6:8] 18 W = [7, 9, 10, 5, 8, 4, 2, 1, 6, 3, 7, 9, 10, 5, 8, 4, 2] 19 ID_num = [18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2] 20 id_check_num = ['1', '0', 'X', '9', '8', '7', '6', '5', '4', '3', '2'] 21 ID_aXw = 0 22 for i in range(len(W)): 23 ID_aXw = ID_aXw + int(id[i]) * W[i] 24 25 id_check_index = ID_aXw % 11 26 if id_check == id_check_num[id_check_index]: 27 if len(id) == 18: 28 if int(id_sex) % 2 == 0: 29 print('性别:女') 30 else: 31 print('性别:男') 32 print("出生地为:" + provinces.get(id[0:2]) + city.get( 33 id[2:4]) + "\n" + "出生日期为: " + birth_year + '年' + birth_moon + '月' + birth_day + '日') 34 print("你的身份证号码是:" + id) 35 print("\n身份证号检验:这是一个正确的身份证号码") 36 else: 37 print("\n身份证号检验:" + id + "是一个错误的身份证号码") 38 else: 39 print("\n身份证号检验:" + id + "是一个错误的身份证号码")
执行效果如下:
- 凯撒密码编码与解码
1 code = input("请输入要编码的信息:") 2 print("编码后为:") 3 for i in code: 4 print(chr(ord(i)+3),end="") 5 decode = input("\n请输入要解码的信息:") 6 print("解码后为:") 7 for i in decode: 8 print(chr(ord(i)-3),end="")
执行效果如下:
- 网址观察与批量生成
1 import webbrowser as web # 命名为web 2 for i in range(2, 22): 3 URL = 'http://news.gzcc.cn/html/xiaoyuanxinwen/{}.html'.format(i) 4 print(URL) 5 URL = 'http://news.gzcc.cn/html/xiaoyuanxinwen/2.html' 6 web.open(URL)
执行效果如下:
二、英文词频统计预处理
- 下载一首英文的歌词或文章或小说。
- 将所有大写转换为小写
- 将所有其他做分隔符(,.?!)替换为空格
- 分隔出一个一个的单词
- 并统计单词出现的次数。
1 def read_text(): 2 fo=open("C:The_Spectre.txt", "r", encoding="UTF-8-sig") 3 line = fo.read() 4 print(line) 5 print("\n") 6 fo.close() 7 return line 8 9 10 def modify_text(): 11 changes = {',', '.', 'don\'t', '\n', 'I\'ve'} 12 text = read_text() 13 for change in changes: 14 if change == 'don\'t': 15 text = text.replace(change, 'do not') 16 elif change == 'I\'ve': 17 text = text.replace(change, 'I have') 18 else: 19 text = text.replace(change, ' ') 20 return text 21 22 23 def lowercase_and_count(): 24 string = modify_text().lower() 25 result = [] 26 print(string.split(' ')) 27 print("\n") 28 for word in string.split(' '): 29 if word not in result: 30 result.append(word) 31 for word in result: 32 if word != "": 33 print(word + " 这个词在歌曲中出现了" + str(string.count(word)) + "次") 34 else: 35 pass 36 return 37 38 39 lowercase_and_count()
执行效果如下:
三、文件操作
- 同一目录、绝对路径、相对路径
- 凯撒密码:从文件读入密函,进行加密或解密,保存到文件。
- 词频统计:下载一首英文的歌词或文章或小说,保存为utf8文件。从文件读入文本进行处理。
1 #同一目录 2 fo=open('text.txt','r',encoding='UTF-8-sig') 3 content=fo.read() 4 fo.close() 5 print(content,end='') 6 #绝对路径 7 fo=open(r'C:\Users\Administrator\PycharmProjects\MadLibs\text.txt','r',encoding="UTF-8-sig") 8 content=fo.read() 9 fo.close() 10 print(content,end='') 11 #相对路径 12 fo=open(r'./text.txt','r',encoding="UTF-8-sig") 13 content=fo.read() 14 fo.close() 15 print(content,end='')
1 def encode(): 2 fo = open(r'C:\Users\Administrator\PycharmProjects\MadLibs\The_Spectre.txt', 'r', encoding="UTF-8-sig") 3 s = fo.read() 4 str = '' 5 for i in s: 6 str = str +chr(ord(i)+3) 7 print(chr(ord(i) + 3), end='') 8 fo.close() 9 fo = open(r'C:\Users\Administrator\PycharmProjects\MadLibs\The_Spectre.txt', 'w', encoding="UTF-8-sig") 10 fo.write(str) 11 fo.close() 12 return 13 14 15 def decode(): 16 fo = open(r'C:\Users\Administrator\PycharmProjects\MadLibs\The_Spectre.txt', 'r', encoding="UTF-8-sig") 17 s = fo.read() 18 str = '' 19 for i in s: 20 str = str + chr(ord(i) - 3) 21 print(chr(ord(i) - 3), end='') 22 fo.close() 23 fo = open(r'C:\Users\Administrator\PycharmProjects\MadLibs\The_Spectre.txt', 'w', encoding="UTF-8-sig") 24 fo.write(str) 25 fo.close() 26 return 27 28 29 def main(): 30 while True: 31 print(u"1. 加密,2. 解密") 32 choice = input("请选择:") 33 if choice == "1": 34 encode() 35 print("") 36 elif choice == "2": 37 decode() 38 else: 39 print(u"您的输入有误!") 40 return 41 42 43 if __name__ == '__main__': 44 main()
执行效果如下:
四、函数定义
- 加密函数
1 def bian_ma(): 2 code = input("请输入要编码的信息:") 3 print("编码后为:") 4 for i in code: 5 print(chr(ord(i)+3),end="") 6 return
- 解密函数
1 def jie_ma(): 2 decode = input("\n请输入要解码的信息:") 3 print("解码后为:") 4 for i in decode: 5 print(chr(ord(i) - 3), end="") 6 return
- 读文本函数
1 def read_text(): 2 fo=open("C:The_Spectre.txt", "r", encoding="UTF-8-sig") 3 line = fo.read() 4 print(line) 5 print("\n") 6 fo.close() 7 return line