利用Python3将EXCEL中某列特殊字符之前的汉字取首字母,特殊字符之后的汉字取全拼,然后用下划线“_”相连,写入下一列
把*******.xls中的汉字人名转成用户名,写到后面的单元格中。
例如:网络--李大海 : wl_lidahai
现场-扬帆 : xc_yangfan
蹭课_张马: ck_zhangma
代码如下:
主要实现了汉字转拼音、获取拼音首字母、复杂字符串中提取汉字(Python3 提取中文的正则表达式)以及对excel的读和修改。
#_author:'ZYB' #data:2018/12/27 from xpinyin import Pinyin import re import xlrd from xlutils import copy def single_get_first(unicode): str1 = unicode.encode('gbk') try: ord(str1) return str1 except: asc = str1[0] * 256 + str1[1] - 65536 if asc >= -20319 and asc <= -20284: return 'a' if asc >= -20283 and asc <= -19776: return 'b' if asc >= -19775 and asc <= -19219: return 'c' if asc >= -19218 and asc <= -18711: return 'd' if asc >= -18710 and asc <= -18527: return 'e' if asc >= -18526 and asc <= -18240: return 'f' if asc >= -18239 and asc <= -17923: return 'g' if asc >= -17922 and asc <= -17418: return 'h' if asc >= -17417 and asc <= -16475: return 'j' if asc >= -16474 and asc <= -16213: return 'k' if asc >= -16212 and asc <= -15641: return 'l' if asc >= -15640 and asc <= -15166: return 'm' if asc >= -15165 and asc <= -14923: return 'n' if asc >= -14922 and asc <= -14915: return 'o' if asc >= -14914 and asc <= -14631: return 'p' if asc >= -14630 and asc <= -14150: return 'q' if asc >= -14149 and asc <= -14091: return 'r' if asc >= -14090 and asc <= -13119: return 's' if asc >= -13118 and asc <= -12839: return 't' if asc >= -12838 and asc <= -12557: return 'w' if asc >= -12556 and asc <= -11848: return 'x' if asc >= -11847 and asc <= -11056: return 'y' if asc >= -11055 and asc <= -10247: return 'z' return '' def GetFirstPinyin(string): if string == None: return None lst = list(string) charLst = [] for l in lst: charLst.append(single_get_first(l)) return ''.join(charLst) def ChoiceHanZi(str): # 中文匹配正则 chinese_pattern = '[\u4e00-\u9fa5]+' res = re.findall(chinese_pattern, str) return res if __name__ == '__main__': book = xlrd.open_workbook('*****.xls') new_book = copy.copy(book) sheet = book.sheet_by_index(0) new_sheet = new_book.get_sheet(0) # 修改excel的时候,只能通过get_sheet() for row in range(1,sheet.nrows): res = sheet.row_values(row)[0] Li = ChoiceHanZi(res) hi = GetFirstPinyin(Li[0])+'_'+Pinyin().get_pinyin(Li[1],'') new_sheet.write(row, 1, hi) new_book.save('*****.xls')