0803/代码
1.MXnet深度学习环境及GluonCV深度学习包的安装2.Python遍历文件夹中的文件并根据文件类型将文件分类储存3.对ERA5数据进行区域提取及时间处理4.基于街景全景照片数据计算SVF(天空开阔度)5.基于transbigdata包获取交通线路和站点的邻接表及shp文件6.gephi导入networkx:使用经纬度绘图并根据情景计算节点指标与网络整体指标(关联gephi导入networkx一文)7.备忘:爬取poi代码8.基于Python+MXnet预训练模型的街景图像语义分割代码9.基于Python Sklearn的机器学习代码(备忘)10.基于OpenAI API接口向GPT4v上传图像进行图像分析11.保持校园网自动登录的代码备忘: 基于webdriver和chrome12.Python Selenium+cookie+XPATH爬取数据13.Python web自动化爬虫-selenium/处理验证码/Xpath
14.0803/代码
数据处理:
import csv from csv import writer with open('july.txt', 'r', encoding='utf-8') as file: for line in file: result=["".join(line).strip('\n')] print(result) result2=result[0].split(',') print(result2) id_time=result2[3] print(id_time) with open("{}.csv".format(id_time), "a", newline="") as f_object: writer_object = writer(f_object) writer_object.writerow(result2) import pandas as pd for i in range(6,19): csv_file=pd.read_csv('{}.csv'.format(i), usecols = ['OUT','SUM1','IN','SUM2']) grouped1 = csv_file.groupby('OUT')['SUM1'].sum() grouped2 = csv_file.groupby('IN')['SUM2'].sum() grouped_OUT=grouped1.reset_index() grouped_OUT.to_csv('{}_OUT.csv'.format(i),index=False) grouped_IN=grouped2.reset_index() grouped_IN.to_csv('{}_IN.csv'.format(i),index=False)
任务1-AUTO-ARIMA:
import numpy as np from pmdarima import auto_arima import csv from csv import writer import time import pandas as pd csv_file=open('节点IN.csv') csv_reader_lines = csv.reader(csv_file) for line in csv_reader_lines: print(line) data = np.asarray(line[1:14]) print(data) print(pd.isnull(data)) try: model1=auto_arima(data,start_p=1,start_q=1,max_p=3,max_q=3,start_P=0,seasonal=False,trace = True,error_action ='ignore',suppress_warnings = True,stepwise=True) model1.fit(data) forecast, conf_int = model1.predict(n_periods=1, return_conf_int=True) forecast=forecast/0.1 print(forecast) print(conf_int) list_data=[line[0],forecast.tolist()[0],conf_int[0][0]/0.1,conf_int[0][1]/0.1] print(list_data) with open("节点IN_PRED.csv", "a", newline="") as f_object: writer_object = writer(f_object) writer_object.writerow(list_data) except: with open("LOG.csv", "a", newline="") as f2_object: writer_object = writer(f2_object) writer_object.writerow(line) pass
任务2:SVM/GBDT/RF/KN
import numpy as np import csv from csv import writer import time import pandas as pd from sklearn import datasets, svm, metrics from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor from sklearn.svm import SVR from sklearn.neighbors import KNeighborsRegressor from sklearn.model_selection import GridSearchCV csv_file=open('节点IN.csv') csv_reader_lines = csv.reader(csv_file) for line in csv_reader_lines: line_data=[ float(x) for x in line[1:14] ] X_origin = [6,7,8,9,10,11,12,13,14,15,16,17,18] X = np.asarray(X_origin).reshape(-1, 1) y = np.asarray(line_data) X_train, X_test, y_train, y_test=X,X,y,y #SVM model = SVR(kernel='rbf',gamma='scale', C=10, epsilon = 0.05) model.fit(X_train, y_train) #GBDR model2 = GradientBoostingRegressor() model2.fit(X_train, y_train) #RF model3 = RandomForestRegressor() model3.fit(X_train, y_train) #KNR model4 = KNeighborsRegressor() model4.fit(X_train, y_train) y_pred = model.predict(X_test) print(y_pred) y_pred2 = model2.predict(X_test) print(y_pred2) y_pred3 = model3.predict(X_test) print(y_pred3) y_pred4 = model4.predict(X_test) print(y_pred4) R2 = metrics.r2_score(y_test, y_pred) R2_2 = metrics.r2_score(y_test, y_pred2) R2_3 = metrics.r2_score(y_test, y_pred3) R2_4 = metrics.r2_score(y_test, y_pred4) print(R2) print(R2_2) print(R2_3) print(R2_4) MAE = metrics.mean_absolute_error(y_test, y_pred) MAE_2 = metrics.mean_absolute_error(y_test, y_pred2) MAE_3 = metrics.mean_absolute_error(y_test, y_pred3) MAE_4 = metrics.mean_absolute_error(y_test, y_pred4) print(MAE) print(MAE_2) print(MAE_3) print(MAE_4) list_data=[R2,R2_2,R2_3,R2_4,MAE,MAE_2,MAE_3,MAE_4,y_pred,y_pred2,y_pred3,y_pred4] with open("节点_NH.csv", "a", newline="") as f_object: writer_object = writer(f_object) writer_object.writerow(list_data)
任务3:sklearnRF
# coding:utf-8 import sklearn from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression import pandas as pd import chardet import matplotlib matplotlib.use('Agg') from matplotlib import pyplot as plt from sklearn import preprocessing import csv from csv import writer from sklearn import metrics import numpy as np import seaborn as sns from sklearn.inspection import partial_dependence from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.model_selection import cross_val_score from sklearn.preprocessing import OneHotEncoder from matplotlib.pyplot import MultipleLocator from sklearn.metrics import r2_score import time from sklearn.model_selection import GridSearchCV #构造函数 def main_process(NAME,i,nestimator,maxdepth): pd_data=pd.read_csv(NAME,low_memory=False) X_origin = pd_data.loc[:, ('STATION','DIS_CEN','ROAD','CANYIN','FENGJING','GONGSI','GOUWU','KEJIAO','ZHUZHAI','SHENGHUO','YILIAO','BTSM','WATER','POI','RENKOU','JULI','OUT_HOUR{}'.format(i),'IN_HOUR{}'.format(i))] y_origin = pd_data.loc[:, 'HOUR{}'.format(i)] X_test, y_test = sklearn.utils.shuffle(X_origin, y_origin) X, y = X_origin, y_origin #定义训练模型 rfreg = RandomForestRegressor(n_estimators=nestimator, max_depth=maxdepth, random_state=90) #训练 model = rfreg.fit(X, y) print('模型特征重要性:') print(rfreg.feature_importances_) y_pred = rfreg.predict(X.values) np.savetxt('{}.txt'.format(i),y_pred) #直接返回十次交叉验证值,此处返回负的MSE,转为正值 cross_score=cross_val_score(rfreg, X_test, y_test, cv=5,scoring = 'r2').mean() print('R2:',cross_score) cross_score2=cross_val_score(rfreg, X_test, y_test, cv=5,scoring = 'neg_mean_absolute_error').mean() print('MAE:',-cross_score2) cross_score3=cross_val_score(rfreg, X_test, y_test, cv=5,scoring = 'explained_variance').mean() print('EV:',cross_score3) list_data_1=[cross_score, -cross_score2, cross_score3] with open("节点_参数.csv", "a", newline="") as f_object: writer_object = writer(f_object) writer_object.writerow(list_data_1) # 做ROC曲线 plt.ion() plt.figure(figsize=(24,8)) plt.plot(range(len(y_pred)), y_pred, 'r', label="predict") plt.plot(range(len(y_pred)), y, 'b', label="test", alpha=0.5) plt.legend(loc="upper center") # 显示图中的标签 plt.xlabel("ID") plt.ylabel('流量') plt.savefig("{}.png".format(i)) plt.show() plt.close() time.sleep(5) #绘制部分依赖图,输入特征 features = ['STATION','DIS_CEN','ROAD','CANYIN','FENGJING','GONGSI','GOUWU','KEJIAO','ZHUZHAI','SHENGHUO','YILIAO','BTSM','WATER','POI','RENKOU','JULI','OUT_HOUR{}'.format(i),'IN_HOUR{}'.format(i)] sklearn.inspection.plot_partial_dependence(rfreg, X, features,n_jobs=3, grid_resolution=20, method='brute')#,line_kw={"color": "black","lw":0.8},line_kw是传给plot的关键字字典 plt.ion()#动态图自动关闭 plt.gcf().set_size_inches(12, 24) plt.gcf() plt.savefig("{}_1.png".format(i)) plt.show() time.sleep(5) plt.close() #自动参数寻优 def grid_search(NAME,i_1): pd_data=pd.read_csv(NAME,low_memory=False) X_origin = pd_data.loc[:, ('STATION','DIS_CEN','ROAD','CANYIN','FENGJING','GONGSI','GOUWU','KEJIAO','ZHUZHAI','SHENGHUO','YILIAO','BTSM','WATER','POI','RENKOU','JULI','OUT_HOUR{}'.format(i),'IN_HOUR{}'.format(i))] y_origin = pd_data.loc[:, 'HOUR{}'.format(i_1)] X_origin, y_origin = sklearn.utils.shuffle(X_origin, y_origin) X, y = X_origin[0:5000], y_origin[0:5000] rfreg_ = RandomForestRegressor(random_state=90) #寻优参数范围 param_grid ={'n_estimators' : range(1,400,50), 'max_depth' : range(1,20,3), } rf_best = GridSearchCV(rfreg_, param_grid = param_grid, cv = 5,scoring='neg_mean_absolute_error') rf_best.fit(X, y) #打印最优参数 print(rf_best.best_params_) #返回最优参数 return rf_best.best_params_['n_estimators'],rf_best.best_params_['max_depth'] path='线_ALL_origin.csv' for i_real in range(6,19): # best=grid_search(path,i_real) nestimator_ = 200 maxdepth_ = 10 list_data_2=[nestimator_,maxdepth_] with open("节点_树参数.csv", "a", newline="") as f_object: writer_object = writer(f_object) writer_object.writerow(list_data_2) main_process(path,i_real,nestimator_,maxdepth_)
网络图像绘制:
import networkx as nx import matplotlib.pyplot as plt import csv from csv import reader import numpy as np from jenkspy import jenks_breaks #构造函数 def networkxG(NAME1,NAME2,i): #初始化网络 G = nx.Graph() #点位置pos pos={} #向网络添加点 with open(NAME1, "r") as my_file: file_reader = reader(my_file) for line in file_reader: id=line[0] lon=line[1] lat=line[2] G.add_nodes_from([(id)]) pos[id]=(float(lon)*10,float(lat)*10) #向网络添加边 jen_list=[] with open(NAME2, "r") as my_file2: file_reader2 = reader(my_file2) for line in file_reader2: OUT=line[0] IN=line[1] wei=line[i-4] G.add_edge(OUT, IN, weight=float(wei)) jen_list.append(float(wei)) #分权重保存边到列表中,以便分别绘制,此处分两层 edges = G.edges() G1=[] G2=[] G3=[] G4=[] #计算自然间断点分级 # breaks=jenks_breaks(jen_list, n_classes=2) # print(breaks) for u,v in edges: a=G[u][v] b=a['weight'] if b <= 10: G1.append((u,v)) elif 10 < b <=50: G2.append((u,v)) elif 50 < b <=100: G3.append((u,v)) else: G4.append((u,v)) #nx.draw_networkx_nodes(G, pos, node_size=0.1) plt.ion() plt.figure(dpi=100) #分别绘制权重低和高的边,以便权重高的边在上层 nx.draw_networkx_edges(G, pos, edgelist=G1, width=0.1 ,alpha=0.1, edge_color='peachpuff') nx.draw_networkx_edges(G, pos, edgelist=G2, width=0.2 ,alpha=0.2, edge_color='lightsalmon') nx.draw_networkx_edges(G, pos, edgelist=G3, width=0.2 ,alpha=0.3, edge_color='tomato') nx.draw_networkx_edges(G, pos, edgelist=G4, width=0.2 ,alpha=0.5, edge_color='indianred') plt.show() plt.savefig("{}_1.png".format(i)) plt.close() node_path='点.csv' edge_path='OD_PRED.csv' for i in range(6,19): networkxG(node_path,edge_path,i)
图像转gif:
import imageio with imageio.get_writer(uri='节点_IN.gif', mode='I', fps=1) as writer: for i in range(15,19): writer.append_data(imageio.imread(f'{i+1}.jpg'))
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了