MSK的港口五字码的爬虫Python开发
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | def op_ports(citykey): try : citykey = str(citykey).replace( " " , "%20" ) xurl = "http://api.maersk.com/locations/?brand=maeu&cityName=" + citykey + "&type=city&pageSize=50&sort=cityName" # print(xurl) res = sp.get_res_no_verify(xurl) res = '{"data":[' + res + ']}' res = res.replace( "\n" , "" ) res = res.replace( "}{" , "} , {" ) print(res) dict = json.loads(res) arr = dict[ "data" ] for x in arr: regionCode = "" regionName = "" unLocCode = "" if (str(x).find( "unLocCode" ) == -1): unLocCode = x[ "maerskGeoLocationId" ] else : unLocCode = x[ "unLocCode" ] if (str(x).find( "regionCode" ) > -1): regionCode = str(x[ "regionCode" ]) if (str(x).find( "regionName" ) > -1): regionName = str(x[ "regionName" ]).replace( "'" , "" ) id = str(x[ "type" ]) + "','" + str( x[ "maerskGeoLocationId" ]) + "','" + str(unLocCode) + "','" + str( unLocCode) + "','" + str( x[ "countryName" ]) + "','" + str( x[ "countryCode" ]) + "','" + str(x[ "cityName" ]) + "','" + str(x[ "maerskRkstCode" ]) + "','" + str( x[ "maerskRktsCode" ]) + "','" + str(x[ "timezoneId" ]) + "','" + str( "MAEU,MCPU,SAFM,SEAU,SEJJ" ) + "','" + str( "MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand" ) + "','" + str( str(x[ "countryGeoId" ])) + "','" + regionCode + "','" + regionName id = jd.get_md5(id) # if len(db.get_sql(" select UUID from tablexxxx where UUID='" + id + "' ")) > 0: # print("---- database tablexxxx exists ----") # continue try : sqlx = " INSERT into tablexxxx (UUID,type,maerskGeoLocationId,unLocCode,mskcode,countryName,countryCode,cityName,maerskRkstCode,maerskRktsCode,timezoneId,brands,brandNames,countryGeoId,regionCode,regionName,createtime,sortv) VALUES ('" + id + "','" + str(x[ "type" ]) + "','" + str( x[ "maerskGeoLocationId" ]) + "','" + str(unLocCode) + "','" + str(unLocCode) + "','" + str( x[ "countryName" ]).replace( "'" , "" ) + "','" + str( x[ "countryCode" ]) + "','" + str(x[ "cityName" ]).replace( "'" , "" ) + "','" + str( x[ "maerskRkstCode" ]) + "','" + str( x[ "maerskRktsCode" ]) + "','" + str(x[ "timezoneId" ]).replace( "'" , "" ) + "','" + str( "MAEU,MCPU,SAFM,SEAU,SEJJ" ) + "','" + str( "MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand" ) + "','" + str(str(x[ "countryGeoId" ])) + "','" + regionCode + "','" + regionName + "',now(),0) " print( "" ) print(sqlx) print( "" ) db.exec_sql(sqlx) print( "---- databse insert success ----" ) except Exception as e: print(str(e)) except Exception as e: print(str(e)) # 去重unlockCode def remove_unlockcode(): sql = " SELECT UUID,cityName,countryName from tablexxxx ORDER BY createtime desc " arr = db.get_sql(sql) arrcode = [] arrsql = [] for x in arr: if arrcode.__contains__(str(x[1]) + str(x[2])): print(str(x[0])) arrsql.append( " delete from tablexxxx where UUID='" + x[0] + "'" ) pass else : arrcode.append(str(x[1]) + str(x[2])) if (len(arrsql) > 0): db.exec_sqls(arrsql) print(arrcode) print( "done" ) # 刷新马士基船公司的code def op_msk_port_data(): sql = " SELECT cityName from tablexxxx WHERE unLocCode is not null GROUP BY cityName ORDER BY cityName desc " arr = db.get_sql(sql) for x in arr: op_ports(x[0]) # 处理搜索记录 def op_user_search_his(): try : arr_sql = [] sql = "select id,k from table_search_xxxxx" arr = db.get_sql(sql) print( "get arr from table_search_xxxxx length is:" + str(len(arr))) for x in arr: id = str(x[0]) key = str(x[1]) arr_sql.append( "delete from table_search_xxxxx where id='" + id + "';" ) if redisx.has_key(key): print(key + " have done redis" ) continue redisx. set (key, "" ) op_ports(key) if len(arr_sql) > 0: db.exec_sqls(arr_sql) except Exception as ex: print(str(ex)) # 搜索城市关键字 if __name__ == '__main__' : # while True: # op_user_search_his() # time.sleep(90) op_ports( 'apapa' ) |
以上是不需要登录也不需要token的方法获取马士基船公司的港口五字码。
用于做pol,pod的基础数据,马士基港口五字码可能会更新(2020年比如青岛港是CNQIN,目前已经变为了国际化的CNTAO),并且各个船公司的港口表体现在web或是app上的结构是完全不一样的,cosco则是long数值类型。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
· DeepSeek 开源周回顾「GitHub 热点速览」