我的代码- rf sampling

  


# coding: utf-8

# In[6]:

 

import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc
from imblearn.over_sampling import SMOTE


# In[7]:


data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")


# In[8]:


data.iloc[:,5:22] = data.iloc[:,5:22].apply(pd.to_numeric,errors='coerce')


# In[9]:


data.head()


# In[10]:


data.Target = data.Target.astype("category")


# In[11]:


Y = data.Target
X = data.drop(columns='Target')


# In[13]:


X=X.drop(columns=['Recipe_Name','defect_count'])


# In[14]:


X.head()


# In[64]:


X=X.drop(columns=['defect_count'])
X.head()


# In[8]:


for i in range(0,18):
med = np.median(X.iloc[:,i][X.iloc[:,i].isna() == False])
X.iloc[:,i] = X.iloc[:,i].fillna(med)


# In[9]:


nz = Normalizer()
X.iloc[:,10:12]=pd.DataFrame(nz.fit_transform(X.iloc[:,10:12]),columns=X.iloc[:,10:12].columns)
X.iloc[:,0:3]=pd.DataFrame(nz.fit_transform(X.iloc[:,0:3]),columns=X.iloc[:,0:3].columns)
X


# In[15]:


X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)


# In[16]:


sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)


# In[17]:


print(y_train.value_counts(), np.bincount(y_train_smote))


# In[18]:


from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)


# In[19]:


# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)


# In[20]:


rm_trans=random_forest.transform()


# In[21]:


# Make predictions on the test data
y_pred = random_forest.predict(X_test)


# In[22]:


print(classification_report(y_pred=y_pred,y_true=y_test))


# In[23]:


print(confusion_matrix(y_pred=y_pred,y_true=y_test))


# In[24]:


f1_score(y_pred=y_pred,y_true=y_test)


# In[25]:


print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")


# In[26]:


print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")

 

posted on 2018-12-19 10:25  Aimee0207  阅读(139)  评论(0编辑  收藏  举报

导航