数据导入
import numpy as np
import pandas as pd
data = pd.read_csv('data/kaggle_house_price_prediction/kaggle_hourse_price_train.csv')
data = pd.read_csv('data/wine_quality/winequality-white.csv', delimiter=";")
spambase = np.loadtxt('data/spambase/spambase.data', delimiter = ",")
数据预处理(Dataframe)
# 丢弃有缺失值的特征(列)
data.dropna(axis = 1, inplace = True)
# 丢弃某个特征
x = data.drop('quality', axis = 1, inplace = False)
#保留数值特征
data = data.select_dtypes(exclude=['object'])
numerical_cols = [cname for cname in X_train_full.columns if X_train_full[cname].dtype in ['int64', 'float64']]
#保留分类特征
categorical_cols = [cname for cname in X_train_full.columns if X_train_full[cname].nunique() < 10 and
X_train_full[cname].dtype == "object"]
#另一种丢弃特征的方法
features = data.columns.tolist()
target = 'SalePrice'
features.remove(target)
# 对数据进行预处理,将safe_loans作为标记,将正反两个特征数据合并为为一个特征数据
loans['safe_loans'] = loans['bad_loans'].apply(lambda x : +1 if x==0 else -1)
del loans['bad_loans']
#panda划分训练集测试集
from sklearn.utils import shuffle
data_shuffled = shuffle(data, random_state = 32) # 这个32不要改变
num_of_samples = data_shuffled.shape[0]
split_line = int(num_of_samples * 0.7)
train_data = data.iloc[:split_line]
test_data = data.iloc[split_line:]
#同样可以使用下面narray的train_test_split方法
from sklearn.model_selection import train_test_split
trainX, testX, trainY, testY = train_test_split(data[features], data[target], test_size = 0.3, random_state = 32)
离散特征变成向量
def one_hot_encoding(data, features_categorical):
for cat in features_categorical:
one_encoding = pd.get_dummies(data[cat], prefix = cat)
data = pd.concat([data, one_encoding],axis=1)
del data[cat]
return data
数据预处理(Narray)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=42)
预测评价
# 引入评价指标
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import normalized_mutual_info_score
from sklearn.metrics import fowlkes_mallows_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
#十折交叉验证
from sklearn.model_selection import cross_val_predict
prediction = cross_val_predict(model, x, y, cv = 10)
模型
from sklearn.linear_model import LinearRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth = 11,max_leaf_nodes=14)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPClassifier
model = MLPClassifier(solver = 'sgd', learning_rate = 'constant', momentum = 0, learning_rate_init = 0.1, max_iter = 500
from sklearn.neural_network import MLPRegressor
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
绘图
depths = np.arange(1,10)
accuracys= []
precisions= []
recalls = []
f1s = []
for i in depths:
model = DecisionTreeClassifier(max_depth = i)
prediction = cross_val_predict(model, dota2x, dota2y, cv = 10)
acc = accuracy_score(data, prediction)
precision = precision_score(dota2y, prediction)
recall = recall_score(dota2y, prediction)
f1 = f1_score(dota2y, prediction)
accuracys.append(acc)
precisions.append(precision)
recalls.append(recall)
f1s.append(f1)
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(depths,accuracys,label="accuracy score")
ax.set_xlabel("maxdepth")
ax.set_ylabel("accuracy")
ax.set_title("Decision tree Classifier")
ax.legend(framealpha=1)
plt.show()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本