大三下入学测试---数据分析练习

分三部分:关键字提取,利用机器学习算法实现文本分类,获取行政区划编码,数据下钻后续再说

关键字提取:利用jieba分词实现读取数据库的某一列,并对该列分词。

import pymysql
import  pandas as pd
from jieba.analyse import extract_tags

# 创建数据库连接
db = pymysql.connect(host="", user="", password="", db="", charset='utf8mb4')
cursor = db.cursor()

# 查询所有项目简介
query_sql = "SELECT 序号,项目简介  FROM your_tablename"
cursor.execute(query_sql)
results = cursor.fetchall()

#检查字段
check_sql = "SHOW COLUMNS FROM your_tablename LIKE 'keyword'"
cursor.execute(check_sql)
if not cursor.fetchone():
    query_sql = "ALTER TABLE your_tablename ADD keyword VARCHAR(255)"
    cursor.execute(query_sql)
else:
    query = "ALTER TABLE your_tablename DROP COLUMN `keyword`"
    cursor.execute(query)
    query_sql = "ALTER TABLE your_tablename ADD keyword VARCHAR(255)"
    cursor.execute(query_sql)


for result in results:
    # print(result[1])
    for row in result:
        #如果项目简介非空,则提取关键字
        if row:
            keywords = ",".join(extract_tags(row, topK=10))
            print(keywords)
            update_sql = f"UPDATE  your_tablename SET keyword='{keywords}' where 序号='{result[0]}'"
            cursor.execute(update_sql)

db.commit()
# 关闭数据库连接
db.close()

行政编码添加:调用高德的api获取json数据里的行政编码

import requests
import pymysql
import json

db = pymysql.connect(host="",
                     user="",
                     password="",
                     db="",
                     charset='utf8mb4')
cursor = db.cursor()

#调用高德接口获取城市码
def get_adcode(ID,province):
    # 接口地址
    url = "https://restapi.amap.com/v3/config/district"

    
    key = "your_key"

    params = {
        "keywords": province,
        "sub_admin": "2",
        "key": key,
        'extensions': 'base'

    }

    response = requests.get(url=url, params=params)
    if response:
        data = response.json()
        # print(data['districts'][0]['adcode'])
        insert_adcode_to_database(data['districts'][0]['adcode'], ID)

#获取省市和id
def get_province_and_id():
    query_sql = "select ID ,省市 from your_tablename "
    cursor.execute(query_sql)
    provinces = cursor.fetchall()
    for province in provinces:
        print(province[0], province[1])
        get_adcode(province[0],province[1])

#插入城市码到数据库
def insert_adcode_to_database(code, ID):
    query_sql = f"update  ke  set 城市编码={code} WHERE ID={ID}"
    cursor.execute(query_sql)
    db.commit()

#检查是否存在,若存在先删除再创建,否则直接创建
def check_column(cursor):
    check_sql = "SHOW COLUMNS FROM your_tablename LIKE '城市编码'"
    cursor.execute(check_sql)
    if not cursor.fetchone():
        query_sql = "ALTER TABLE your_tablename ADD 城市编码 VARCHAR(255)"
        cursor.execute(query_sql)
    else:
        query = "ALTER TABLE your_tablename DROP COLUMN `城市编码`"
        cursor.execute(query)
        query_sql = "ALTER TABLE your_tablename ADD 城市编码 VARCHAR(255)"
        cursor.execute(query_sql)



if __name__ == '__main__':
    # check_column(cursor=cursor)
    get_province_and_id()

 

posted @ 2024-03-06 21:21  cojames  阅读(31)  评论(0编辑  收藏  举报