数据清洗2

代码如下:

复制代码
import json
import pymysql
import pandas as pd
import requests
from sqlalchemy import create_engine

# 连接MySQL数据库
conn = pymysql.connect(host='localhost', user='root', password='你的密码', db='你的数据库', charset='utf8')

# 编写SQL查询语句
sql = "SELECT * FROM table3 "

# 使用pandas读取数据
df = pd.read_sql(sql, conn)

new_addresses = []

# 更改循环变量名,避免与 DataFrame 对象名冲突
for i, address in enumerate(df['单位']):
    if address:
        url = 'https://restapi.amap.com/v3/geocode/geo?address=' + address + '&output=JOSON&key=你的key'
        request = requests.get(url).text
        try:
            result = json.loads(request)
        except json.JSONDecodeError as e:
            new_address=address
        if result['status'] == "1":
            adcode = result['geocodes'][0]['adcode']
            sql2 = "UPDATE table3 SET 行政区划编码 = '%s' where 单位 = '%s' " % (adcode, address)
            # 执行数据库插入
            cursor = conn.cursor()
            cursor.execute(sql2)
            conn.commit()
            if result['geocodes'][0]['district']:
                new_address = str(result['geocodes'][0]['province']) + str(result['geocodes'][0]['city']) + str(result['geocodes'][0]['district'])
            else:
                if result['geocodes'][0]['city']:
                    new_address = result['geocodes'][0]['province'] + result['geocodes'][0]['city']
                else:
                    new_address = result['geocodes'][0]['province']
        else:
            new_address = address
    else:
        new_address = ""
        address = ""
    if new_address != address:
        sql1 = "UPDATE table3 SET 单位 = '%s' WHERE 单位 = '%s'" % (new_address, address)
        #向table3插入行政区划编码
        # 使用cursor执行SQL语句
        cursor = conn.cursor()
        cursor.execute(sql1)
        conn.commit()
        cursor.close()
    print(address + "---->" + new_address + " --" + str(i))

conn.close()
复制代码

同理其他表也是这样操作,然后再合并。

posted @ 2024-03-19 20:56  橘子味芬达水  阅读(4)  评论(0编辑  收藏  举报