Machine Learning for Beginners(scikit-learn module)
Machine Learning Common Lifycycle
- Import the Data
- Clean the Data
- Split the Data into Training/Test Sets
- Create a Model
- Train the Model
- Make Predictions
- Evaluate and Imporove
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from joblib import dump, load
from sklearn import tree
# prepare date
music_df = pd.read_csv('music.csv') # csv.zip: https://bit.ly/3muqqta
X = music_df.drop(columns=['genre'])
y = music_df['genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
# learning
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
# evaluating
predictions2 = model.predict(X_test) # predict on test set
score = accuracy_score(predictions2, y_test) # evaluate, return value range 0-1
print("accuracy score:", score)
# dump and load model
dump(model, 'music-recommender.joblib')
# load & use
model_loaded = load('music-recommender.joblib')
print(model_loaded.predict([[21, 1]]))
# visualizing a Decision Tree
tree.export_graphviz(model,
out_file='music-recommender.dot',
feature_names=['age', 'gender'], # features of data
class_names=sorted(y.unique()), # labels
label='all', # show informative labels at every node
rounded=True,
filled=True)
# Then use vscode extension (https://marketplace.visualstudio.com/items?itemName=joaompinto.vscode-graphviz)
# to open .dot file and preview the decision tree.
沉舟侧畔千帆过,病树前头万木春。