广告 竞价排名 import Levenshtein as Le seqratio_res = Le.seqratio(chk_name_lsit, cmp_)

 

 

 

pip install python-Levenshtein

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from openpyxl import Workbook
import xlrd
import time
import Levenshtein as Le
 
target_city_list = []
file_name = 'DB任务_csv_py_wholeCSV-加百度170825095914'
FEXCEL = '%s%s' % (file_name, '.xlsx')
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[0]
nrows, ncols = table.nrows, table.ncols
wb = Workbook()
worksheet = wb.active
file_title_str = 'dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, BD_name, BD_addr, seqratio_res'
file_title_l = file_title_str.replace(' ', '').split(',')
worksheet.append(file_title_l)
BDpoi_list_tag, BDpoi_list_tagb = '|-|', '|--|'
for i in range(0, nrows):
    l = table.row_values(i)
    dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list = l
    if dbid == 'dbid':
        continue
    addr_ = '%s%s%s%s' % (city, district, address, city_street)
    chk_name_lsit,cmp_list,sorted_seqratio_res_dic = [name_,addr_],BDpoi_list.split(BDpoi_list_tag),{}
    for ii in cmp_list:
        cmp_, BD_name, BD_addr = ['', ''], '', ''
        cmp_one = ii.split(BDpoi_list_tagb)
        if len(cmp_one) == 2:
            BD_name, BD_addr = cmp_[0], cmp_[1] = cmp_one
        else:
            BD_name = cmp_[0] = cmp_one[0]
        seqratio_res = Le.seqratio(chk_name_lsit, cmp_)
        ll = dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, BD_name, BD_addr, seqratio_res
        if seqratio_res not in sorted_seqratio_res_dic:
            sorted_seqratio_res_dic[seqratio_res] = []
        sorted_seqratio_res_dic[seqratio_res].append(ll)
 
    sorted_seqratio_res_list = sorted(sorted_seqratio_res_dic)
 
    for seqratio_res in sorted_seqratio_res_list:
        lll = sorted_seqratio_res_dic[seqratio_res]
        for vl in lll:
            worksheet.append(vl)
 
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
file_name = '%s%s%s' % (file_name,'-Levenshtein',localtime_)
file_name_save = '%s%s' % (file_name, '.xlsx')
wb.save(file_name_save)

  

posted @   papering  阅读(249)  评论(0编辑  收藏  举报
编辑推荐:
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)
历史上的今天:
2016-08-25 queue
2016-08-25 a loosely strongly typed language
点击右上角即可分享
微信分享提示