算法调参 weight_ratio, weight_seqratio
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | from openpyxl import Workbook import xlrd import time import Levenshtein as Le target_city_list = [ '北京市' , '上海市' , '深圳市' , '广州市' ] source_name = 'JMTool任务_csv_py_wholeCSV-加百度170826165729' BDpoi_list_tag, BDpoi_list_tagb = '|-|' , '|--|' FEXCEL = '%s%s' % (source_name, '.xlsx' ) weight_ratio, weight_seqratio = 0.7 , 0.3 def main_(): global source_name data = xlrd.open_workbook(FEXCEL) table = data.sheets()[ 0 ] nrows, ncols = table.nrows, table.ncols res_dic = {} for i in range ( 0 , nrows): l = table.row_values(i) dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list = l if dbid = = 'dbid' : continue if city not in target_city_list: continue if city not in res_dic: res_dic[city] = {} if district not in res_dic[city]: res_dic[city][district] = {} if name_ not in res_dic[city][district]: res_dic[city][district][name_] = [] if BDpoi_list.find(BDpoi_list_tag) = = - 1 : ll = dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, ' ', ' ', ' ' res_dic[city][district][name_].append(ll) else : addr_ = '%s%s%s%s' % (city, district, address, city_street) chk_name_lsit, cmp_list, sorted_ratio_seqratio_res_dic = [name_, addr_], BDpoi_list.split( BDpoi_list_tag), {} for ii in cmp_list: if len (ii) = = 0 : continue cmp_, BD_name, BD_addr = [' ', ' '], ' ', ' ' cmp_one = ii.split(BDpoi_list_tagb) if len (cmp_one) = = 2 : # format data -fair BD_name, BD_addr = cmp_[ 0 ], cmp_[ 1 ] = cmp_one[ 0 ], cmp_one[ 1 ].replace(city, ' ').replace(district, ' ') else : BD_name = cmp_[ 0 ] = cmp_one[ 0 ] ratio_res, seqratio_res = Le.ratio(name_, BD_name), Le.seqratio(chk_name_lsit, cmp_) ratio_seqratio_res = weight_ratio * ratio_res + weight_seqratio * seqratio_res ll = dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, BD_name, BD_addr, ratio_seqratio_res, ratio_res, seqratio_res if ratio_seqratio_res not in sorted_ratio_seqratio_res_dic: sorted_ratio_seqratio_res_dic[ratio_seqratio_res] = [] sorted_ratio_seqratio_res_dic[ratio_seqratio_res].append(ll) sorted_seqratio_res_list = sorted (sorted_ratio_seqratio_res_dic) for ratio_seqratio_res in sorted_seqratio_res_list: lll = sorted_ratio_seqratio_res_dic[ratio_seqratio_res] for vl in lll: res_dic[city][district][name_].append(vl) wb = Workbook() worksheet = wb.active file_title_str = ' dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, BDpoi_list, BD_name, BD_addr, ratio_seqratio_res, ratio_res, seqratio_res' file_title_l = file_title_str.replace( ' ' , ' ').split(' ,') worksheet.append(file_title_l) for city in res_dic: for district in res_dic[city]: for name_ in res_dic[city][district]: l = res_dic[city][district][name_] for ll in l: worksheet.append(ll) localtime_ = time.strftime( "%y%m%d%H%M%S" , time.localtime()) file_name = '%s%s%s' % (source_name, '-Levenshtein' , localtime_) file_name_save = '%s%s' % (file_name, '.xlsx' ) wb.save(file_name_save) wb = Workbook() worksheet = wb.active file_title_str = ' dbid, area_code, ref_area_type_code, city, district, address, city_street, name_,BDpoi_list,max_BD_name, max_BD_addr, max_ratio_seqratio_res, ratio_res, seqratio_res' file_title_l = file_title_str.replace( ' ' , ' ').split(' ,') worksheet.append(file_title_l) for city in res_dic: for district in res_dic[city]: for name_ in res_dic[city][district]: l = res_dic[city][district][name_] lll = l[ - 1 ] worksheet.append(lll) localtime_ = time.strftime( "%y%m%d%H%M%S" , time.localtime()) file_name = '%s%s%s' % (file_name, '-Levenshtein-ordered' , localtime_) file_name_save = '%s%s' % (file_name, '.xlsx' ) wb.save(file_name_save) main_() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | from openpyxl import Workbook import xlrd import time import Levenshtein as Le target_city_list = [ '深圳市' ] BDpoi_list_tag, BDpoi_list_tagb = '|-|' , '|--|' source_name = 'JMTool任务_csv_py_wholeCSV_住宅小区-加百度170826152533' FEXCEL = '%s%s' % (source_name, '.xlsx' ) weight_ratio, weight_seqratio = 0.7 , 0.3 def main_(): global source_name data = xlrd.open_workbook(FEXCEL) table = data.sheets()[ 0 ] nrows, ncols = table.nrows, table.ncols res_dic = {} for i in range ( 0 , nrows): l = table.row_values(i) dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, name_reduction, BDpoi_list = l if dbid = = 'dbid' : continue if city not in target_city_list: continue if city not in res_dic: res_dic[city] = {} if district not in res_dic[city]: res_dic[city][district] = {} if name_ not in res_dic[city][district]: res_dic[city][district][name_] = [] if BDpoi_list.find(BDpoi_list_tag) = = - 1 : ll = dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, name_reduction, BDpoi_list, ' ', ' ', ' ' res_dic[city][district][name_].append(ll) else : addr_ = '%s%s%s%s' % (city, district, address, city_street) chk_name_lsit, cmp_list, sorted_ratio_seqratio_res_dic = [name_reduction, addr_], BDpoi_list.split( BDpoi_list_tag), {} for ii in cmp_list: if len (ii) = = 0 : continue cmp_, BD_name, BD_addr = [' ', ' '], ' ', ' ' cmp_one = ii.split(BDpoi_list_tagb) if len (cmp_one) = = 2 : # format data -fair BD_name, BD_addr = cmp_[ 0 ], cmp_[ 1 ] = cmp_one[ 0 ], cmp_one[ 1 ].replace(city, ' ').replace(district, ' ') else : BD_name = cmp_[ 0 ] = cmp_one[ 0 ] ratio_res, seqratio_res = Le.ratio(name_reduction, BD_name), Le.seqratio(chk_name_lsit, cmp_) ratio_seqratio_res = weight_ratio * ratio_res + weight_seqratio * seqratio_res ll = dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, name_reduction, BDpoi_list, BD_name, BD_addr, ratio_seqratio_res, ratio_res, seqratio_res if ratio_seqratio_res not in sorted_ratio_seqratio_res_dic: sorted_ratio_seqratio_res_dic[ratio_seqratio_res] = [] sorted_ratio_seqratio_res_dic[ratio_seqratio_res].append(ll) sorted_seqratio_res_list = sorted (sorted_ratio_seqratio_res_dic) for ratio_seqratio_res in sorted_seqratio_res_list: lll = sorted_ratio_seqratio_res_dic[ratio_seqratio_res] for vl in lll: res_dic[city][district][name_].append(vl) wb = Workbook() worksheet = wb.active file_title_str = 'dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, name_reduction, BDpoi_list, BD_name, BD_addr, ratio_seqratio_res, ratio_res, seqratio_res' file_title_l = file_title_str.replace( ' ' , ' ').split(' ,') worksheet.append(file_title_l) for city in res_dic: for district in res_dic[city]: for name_ in res_dic[city][district]: l = res_dic[city][district][name_] for ll in l: worksheet.append(ll) localtime_ = time.strftime( "%y%m%d%H%M%S" , time.localtime()) file_name = '%s%s%s' % (source_name, '-Levenshtein' , localtime_) file_name_save = '%s%s' % (file_name, '.xlsx' ) wb.save(file_name_save) wb = Workbook() worksheet = wb.active file_title_str = 'dbid, area_code, ref_area_type_code, city, district, address, city_street, name_, name_reduction,BDpoi_list,max_BD_name, max_BD_addr, max_ratio_seqratio_res, ratio_res, seqratio_res' file_title_l = file_title_str.replace( ' ' , ' ').split(' ,') worksheet.append(file_title_l) for city in res_dic: for district in res_dic[city]: for name_ in res_dic[city][district]: l = res_dic[city][district][name_] lll = l[ - 1 ] worksheet.append(lll) localtime_ = time.strftime( "%y%m%d%H%M%S" , time.localtime()) file_name = '%s%s%s' % (file_name, '-Levenshtein-ordered' , localtime_) file_name_save = '%s%s' % (file_name, '.xlsx' ) wb.save(file_name_save) main_() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
2016-08-26 :avalon及通用MVVM的设计原理分析
2016-08-26 php.exe
2016-08-26 the command line tools