大三下入学测试---数据分析练习
分三部分:关键字提取,利用机器学习算法实现文本分类,获取行政区划编码,数据下钻后续再说
关键字提取:利用jieba分词实现读取数据库的某一列,并对该列分词。
import pymysql import pandas as pd from jieba.analyse import extract_tags # 创建数据库连接 db = pymysql.connect(host="", user="", password="", db="", charset='utf8mb4') cursor = db.cursor() # 查询所有项目简介 query_sql = "SELECT 序号,项目简介 FROM your_tablename" cursor.execute(query_sql) results = cursor.fetchall() #检查字段 check_sql = "SHOW COLUMNS FROM your_tablename LIKE 'keyword'" cursor.execute(check_sql) if not cursor.fetchone(): query_sql = "ALTER TABLE your_tablename ADD keyword VARCHAR(255)" cursor.execute(query_sql) else: query = "ALTER TABLE your_tablename DROP COLUMN `keyword`" cursor.execute(query) query_sql = "ALTER TABLE your_tablename ADD keyword VARCHAR(255)" cursor.execute(query_sql) for result in results: # print(result[1]) for row in result: #如果项目简介非空,则提取关键字 if row: keywords = ",".join(extract_tags(row, topK=10)) print(keywords) update_sql = f"UPDATE your_tablename SET keyword='{keywords}' where 序号='{result[0]}'" cursor.execute(update_sql) db.commit() # 关闭数据库连接 db.close()
行政编码添加:调用高德的api获取json数据里的行政编码
import requests import pymysql import json db = pymysql.connect(host="", user="", password="", db="", charset='utf8mb4') cursor = db.cursor() #调用高德接口获取城市码 def get_adcode(ID,province): # 接口地址 url = "https://restapi.amap.com/v3/config/district" key = "your_key" params = { "keywords": province, "sub_admin": "2", "key": key, 'extensions': 'base' } response = requests.get(url=url, params=params) if response: data = response.json() # print(data['districts'][0]['adcode']) insert_adcode_to_database(data['districts'][0]['adcode'], ID) #获取省市和id def get_province_and_id(): query_sql = "select ID ,省市 from your_tablename " cursor.execute(query_sql) provinces = cursor.fetchall() for province in provinces: print(province[0], province[1]) get_adcode(province[0],province[1]) #插入城市码到数据库 def insert_adcode_to_database(code, ID): query_sql = f"update ke set 城市编码={code} WHERE ID={ID}" cursor.execute(query_sql) db.commit() #检查是否存在,若存在先删除再创建,否则直接创建 def check_column(cursor): check_sql = "SHOW COLUMNS FROM your_tablename LIKE '城市编码'" cursor.execute(check_sql) if not cursor.fetchone(): query_sql = "ALTER TABLE your_tablename ADD 城市编码 VARCHAR(255)" cursor.execute(query_sql) else: query = "ALTER TABLE your_tablename DROP COLUMN `城市编码`" cursor.execute(query) query_sql = "ALTER TABLE your_tablename ADD 城市编码 VARCHAR(255)" cursor.execute(query_sql) if __name__ == '__main__': # check_column(cursor=cursor) get_province_and_id()