有趣的宋词

菩萨蛮(回文)
等闲将度三春景。景春三度将闲等。愁怕更高楼。楼高更怕愁。
弄花梅已动。动已梅花弄。梅看几年催。催年几看梅。

赏花归去马如飞酒力微醒时已暮 palindrome 

无无无有有无无,悟得无无便不愚。日月年时损壮粗。见元初。万道霞光攒宝珠。

if hz == prev_hz: dup = True; dup_hz = hz

if re.search('满.+酒', line): print(line)

.py和.txt: https://files.cnblogs.com/files/blogs/714801/songci.zip 我尽力删里面的xx书网膏药了。

 1 # -*- coding: utf-8 -*-
 2 import re
 3 
 4 def handle_line(line):
 5     if len(line) > 40: return
 6     d = {}; max = 0
 7     for hz in line:
 8         if hz in set('、,。之囗'): continue
 9         t = d[hz] = 1 + d.get(hz, 0)
10         if t > max: max = t
11     if max > 6:
12         d = {k:v for k,v in d.items() if v > 2} # dict comprehension
13         print(line, d)
14         input()
15 
16 def handle_line2(line):
17     if len(line) > 40: return
18     prev_hz = dup_hz = ''; dup = False
19     for hz in line:
20         if hz in set('、,。之囗'): continue
21         if hz == prev_hz: dup = True; dup_hz = hz
22         prev_hz = hz
23     if dup:
24         dup_hz += dup_hz
25         global    all_dup_hz
26         all_dup_hz[dup_hz] = 1 + all_dup_hz.get(dup_hz, 0)
27         print(line.strip('\n'), '\t', dup_hz, sep='')
28 
29 def handle_line3(line):
30     if len(line) > 40: return
31     if re.search('满.+酒', line): print(line)
32     # compile(pattern, flags=0) Compile a regular expression pattern, returning a Pattern object.
33     # help(re.compile)
34 
35 all_dup_hz = {}    
36 with open('宋词.txt', 'r', encoding='utf-16le') as f:
37     for line in f: handle_line2(line)
38 d = all_dup_hz
39 if len(d) == 0: quit()
40 if False:
41     d = sorted(d.items(), key=lambda i: i[1], reverse=True) # item (key, value)
42     d = [(k,v) for (k,v) in d if v > 10]
43     print(d)
44 else:
45     print([(l,_)for(l,_)in sorted(d.items(),key=lambda _:_[1],reverse=-1)if _>10])
View Code

 ang ['丈', '上', '丧', '乓', '亡', '亢', '仓', '仗', '仰', '仿', '伤', '佯', '倘', '倘', '倡', '偿', '傍', '党', '养', '冈', '刚', '厂', '吭', '唐', '唱', '商', '嗓', '嚷', '囊', '场', '坊', '堂', '塘', '墒', '壤', '央', '夯', '妄', '妨', '尚', '尝', '岗', '巷', '帐', '帮', '常', '庞', '康', '廊', '张', '当', '彭', '彰', '往', '忘', '忙', '慷', '房', '扛', '扛', '扬', '抗', '挡', '掌', '搪', '攘', '放', '敞', '方', '旁', '旺', '昂', '昌', '晌', '朗', '望', '杖', '杠', '杨', '杭', '枉', '样', '桑', '档', '梆', '棒', '棠', '榔', '榜', '樟', '殃', '氓', '氧', '汤', '汤', '汪', '沧', '洋', '浪', '涨', '淌', '淌', '港', '漳', '漾', '炕', '烫', '狼', '猖', '王', '琅', '瓤', '畅', '疡', '痒', '瘴', '盎', '盲', '磅', '磅', '秧', '章', '糖', '糠', '纲', '纺', '绑', '缸', '网', '羊', '耪', '肛', '肠', '肪', '肮', '胀', '胖', '脏', '膀', '膛', '航', '舱', '芒', '芳', '苍', '茫', '荡', '莽', '葬', '藏', '藏', '蚌', '裳', '让', '访', '详', '谤', '账', '赃', '赏', '趟', '躺', '邦', '郎', '钢', '镑', '长', '防', '阳', '障', '鸯']

https://files.cnblogs.com/files/blogs/714801/py-tbl.zip

 1 # -*- coding: utf-8 -*-
 2 import re
 3 
 4 def main():
 5     d = {}
 6     for line in [i for i in re.split('[\r|\n]', get_raw_tbl(), ) if i != '']:
 7         (py, hzs) = line.split()
 8         py = re.sub('^.+-', '', py)
 9         d[py] = hzs + d.get(py, '')
10     py = 'ang'
11     simple_hz_set = gb2312_set()
12     print(py, sorted([hz for hz in d[py] if hz in simple_hz_set]))
13     
14 def get_raw_tbl():
15     return '''
16 a    吖阿啊锕嗄錒
17 z-uo    左作坐阼佐苲怍岝咗岞侳柮柞昨祚胙唑座秨袏莋笮做捽唶葄酢葃琢蓙稓筰鈼飵撮諎嘬穝繓糳
18 '''
19 
20 def gb2312_set():
21     return set('''
22 啊阿埃挨哎唉哀皑癌蔼矮艾碍爱隘鞍氨安俺按暗岸胺案肮昂盎凹敖熬翱袄傲奥懊澳芭捌扒叭吧
23 ''')
24 
25 main()
View Code

 print(set(d[py]).intersection(gb2312_set()))

posted @ 2021-11-26 17:41  Fun_with_Words  阅读(84)  评论(0编辑  收藏  举报









 张牌。