poorX

  博客园  :: 首页  :: 新随笔  :: 联系 :: 订阅 订阅  :: 管理

接口文档:http://lbs.baidu.com/index.php?title=webapi/guide/webservice-placeapi#service-page-anchor-1-3

1、示例中的数据是从链家拿到的北京小区信息,包含小区名和所在行政区,例如朱雀门/西城,实例化CheckEstateData(朱雀门, 西城)

2、API中region可以多维度组合,为防止全国或同行政区有重名,例如北京有多个万达广场,最好加上region=市+区

3、有一些百度API会把小区名莫名解释成错误的名称,例如《东四西大街50号院》->《东4西大街》,这类数据不会直接更新数据,set status = -1,需要手动维护数据

# -*- coding: utf-8 -*-
import urllib2, json, sys, time

reload(sys)
sys.setdefaultencoding("utf8")

GLOBAL_URL = "http://api.map.baidu.com/place/v2/search?region=北京%s&city_limit=true&query=%s&page_size=10&output=json&ak=%s"
GLOBAL_AK = ""
GLOBAL_SQL = "SELECT `name`, `district` FROM estate WHERE location IS NULL and `status` IS NULL LIMIT 100 "
COLUMN_LIST = ["area", "address", "location", "province", "city", "uid"]
UPDATE_SQL = """
    UPDATE estate 
    SET %s, source_name = '%s', `status` = %s, `result` = '%s'
    WHERE `name` = '%s' AND `district` = '%s'
"""

"""
estate的status字段
更新数据状态
小区名全匹配且全属性 0
小区名全匹配属性不全 1
小区名全匹配无detail=1 2
小区名无全匹配第一个detail=1的全属性数据 3
小区名无全匹配第一个detail=1的属性不全数据 4
小区名无全匹配没有detail=1的数据 5
没搜到任何数据 -1
"""


class CheckEstate:
  def __init__(self):
    # 本地存
    pass

class CheckEstateData: def __init__(self, name, district): self.name = name self.district = district self.error = None self.msg = None self.__get_data__() if self.datas: self.do() def __get_data__(self): try: print "URL: %s" % (GLOBAL_URL % (self.district, self.name, GLOBAL_AK)) html = urllib2.urlopen(GLOBAL_URL % (self.district, self.name, GLOBAL_AK)) b = html.read() c = json.loads(b) if c["status"] == 0 and c["message"] == "ok": self.datas, self.error = c["results"], None else: self.datas, self.error = None, "ERR: API return %s" % c["message"] except Exception, e: self.datas, self.error = None, "ERR: get data %s %s" % (self.name, str(e)) def update(self, sql): print "INFO: sql %s" % sql s = CheckEstate(sql) if s.error: self.error = "ERR: UPDATE ERR, %s" % s.error else: self.msg = "INFO: %s ok" % self.name def check_colunm(self, data): if not set(COLUMN_LIST).difference([k for k in data]): return True return False def get_info(self): for d in self.datas: if d["name"] == self.name and "detail" in d and d["detail"] == 1: r = check_colunm(d) if r: return d, 0 else: return d, 1 # if self.name in [row["name"] for row in self.datas]: # return None, 2 for d in self.datas: if "detail" in d and d["detail"] == 1: r = self.check_colunm(d) if r: return d, 3 else: return d, 4 return None, -1 def do(self): r, status = self.get_info() if r: value = ", ".join( [ "%s = '%s'" % (k, json.dumps(r[k])) if k == "location" else "%s = '%s'" % (k, r[k]) for k in [key for key in r if key in COLUMN_LIST] ] ) sql = UPDATE_SQL % (value, r["name"], status, json.dumps(self.datas, ensure_ascii=False), self.name, self.district) else: sql = "UPDATE estate set `status` = %s, `result` = '%s' WHERE `name` = '%s' and district = '%s'" % (status, json.dumps(self.datas, ensure_ascii=False), self.name, self.district) self.update(sql) def get_estate_info(): c = CheckEstate(None) if c.error: print c.error return c.error for d in c.r: estate = CheckEstateData(d["name"], d["district"]) if estate.error: print estate.error else: print estate.msg time.sleep(0.5) return None if __name__ == "__main__": get_estate_info()

 

为防止链家和百度的小区名有差异,在存储时将API的所有数据本地存一份

CREATE TABLE `estate` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `name` varchar(50) NOT NULL COMMENT '小区名',
  `source_name` varchar(50) DEFAULT NULL COMMENT 'baidu小区原名',
  `district` varchar(20) DEFAULT NULL COMMENT '区(链家侧数据)',
  `area` varchar(20) DEFAULT NULL COMMENT '区(百度侧数据)',
  `street_id` varchar(50) DEFAULT NULL COMMENT '街景图id',
  `address` varchar(100) DEFAULT NULL COMMENT '地址',
  `location` json DEFAULT NULL COMMENT '坐标',
  `province` varchar(30) DEFAULT NULL COMMENT '省份',
  `city` varchar(30) DEFAULT NULL COMMENT '城市',
  `uid` varchar(100) DEFAULT NULL COMMENT 'poi的唯一标示,可用于详情检索',
  `status` tinyint(4) DEFAULT NULL COMMENT '更新数据状态,具体含义看代码',
  `result` json DEFAULT NULL COMMENT '接口返回的数据',
  PRIMARY KEY (`id`),
  KEY `idx_name` (`name`)
) ENGINE=InnoDB AUTO_INCREMENT=8192 DEFAULT CHARSET=utf8mb4;

 

posted on 2020-04-09 16:56  poorX  阅读(906)  评论(0编辑  收藏  举报