我的代码-unsupervised learning


# coding: utf-8

# In[1]:


import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc


# In[2]:


data = pd.read_csv("D:/Users/SGG91044/Desktop/MEP_no_defect_data_pivot_test.csv")


# In[3]:


data.head()


# In[4]:


data.drop(columns=["lotid","waferid","defect_count","eqpid","Chamber","Step","Recipie_Name"],inplace=True)
data


# In[5]:


data.iloc[:,0:17] = data.iloc[:,0:17].apply(pd.to_numeric,errors='coerce')


# In[6]:


for i in range(0,17):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)


# In[10]:


nz = Normalizer()
X=data.iloc[:,0:19]=pd.DataFrame(nz.fit_transform(data.iloc[:,0:17]),columns=data.iloc[:,0:17].columns)


# In[11]:


X


# In[12]:


X_train, X_test = train_test_split(
X, test_size=0.3, random_state=8)


# In[30]:


# fit the model
clf = IsolationForest( max_samples=10000,random_state=10 )
clf.fit(X_train)
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)


# In[35]:


scores_pred = clf.decision_function(X_train.values)
scores_pred


# In[36]:


clf.decision_function(X_test)

 

posted on 2018-12-19 10:27  Aimee0207  阅读(111)  评论(0编辑  收藏  举报

导航