上机实验三:C4.5(带有预剪枝和后剪枝)算法实现与测试

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# (1)加载iris数据集,并留出1/3作为测试集
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=42, stratify=y)

# (2)使用训练集训练带有预剪枝和后剪枝的决策树模型
# 这里我们设置max_depth为3来进行预剪枝,min_samples_leaf为2来进行后剪枝
clf = DecisionTreeClassifier(max_depth=3, min_samples_leaf=2, random_state=42)
clf.fit(X_train, y_train)

# (3)使用五折交叉验证对模型性能进行评估
cv = StratifiedKFold(n_splits=5)
scores_accuracy = cross_val_score(clf, X_train, y_train, cv=cv, scoring='accuracy')
scores_precision = cross_val_score(clf, X_train, y_train, cv=cv, scoring='precision_macro')
scores_recall = cross_val_score(clf, X_train, y_train, cv=cv, scoring='recall_macro')
scores_f1 = cross_val_score(clf, X_train, y_train, cv=cv, scoring='f1_macro')

# 打印训练集的性能评估结果
print(f'训练集准确率: {scores_accuracy.mean():.2f}')
print(f'训练集精确率: {scores_precision.mean():.2f}')
print(f'训练集召回率: {scores_recall.mean():.2f}')
print(f'训练集F1值: {scores_f1.mean():.2f}')

# (4)使用测试集测试模型性能
y_pred = clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)
test_precision = precision_score(y_test, y_pred, average='macro')
test_recall = recall_score(y_test, y_pred, average='macro')
test_f1 = f1_score(y_test, y_pred, average='macro')

# 打印测试集的性能评估结果
print(f'测试集准确率: {test_accuracy:.2f}')
print(f'测试集精确率: {test_precision:.2f}')
print(f'测试集召回率: {test_recall:.2f}')
print(f'测试集F1值: {test_f1:.2f}')
posted @ 2024-11-20 10:25  不会JAVA的小袁  阅读(16)  评论(0编辑  收藏  举报