异常点分析

########## Bathrooms
train_df.columns
all_df = train_df.loc[:, ["Sold Price", "Bathrooms"]]
plt.figure(figsize = (20, 6))
sns.scatterplot(data=all_df, x = 'Sold Price', y = 'Bathrooms')
plt.show()

######### Tax assessed value ######## 3674  43398
all_df = train_df.loc[:, ["Sold Price", "Tax assessed value"]]
plt.figure(figsize = (20, 6))
sns.scatterplot(data=all_df, x = 'Sold Price', y = 'Tax assessed value')
for i in range(train_df.shape[0]):
    if train_df.iloc[i].at['Sold Price'] and int(train_df.iloc[i].at['Sold Price']) > 50000000:
        plt.text(train_df.iloc[i].at['Sold Price']+200, train_df.iloc[i].at['Tax assessed value'], train_df.iloc[i].at['Id'])
plt.show()


######### Tax assessed value ######## 3674  43398
all_df = train_df.loc[:, ["Sold Price", "Annual tax amount"]]
plt.figure(figsize = (20, 6))
sns.scatterplot(data=all_df, x = 'Sold Price', y = 'Annual tax amount')
for i in range(train_df.shape[0]):
    if train_df.iloc[i].at['Sold Price'] and int(train_df.iloc[i].at['Sold Price']) > 50000000:
        plt.text(train_df.iloc[i].at['Sold Price']+200, train_df.iloc[i].at['Annual tax amount'], train_df.iloc[i].at['Id'])
plt.show()


######### Listed Price ######## 3674  43398 44633
all_df = train_df.loc[:, ["Sold Price", "Listed Price"]]
plt.figure(figsize = (20, 6))
sns.scatterplot(data=all_df, x = 'Sold Price', y = 'Listed Price')
for i in range(train_df.shape[0]):
    if (train_df.iloc[i].at['Sold Price'] and int(train_df.iloc[i].at['Sold Price']) > 50000000) or int(train_df.iloc[i].at['Listed Price']) > 100000000:
        plt.text(train_df.iloc[i].at['Sold Price']+200, train_df.iloc[i].at['Listed Price'], train_df.iloc[i].at['Id'])
plt.show()

######### Last Sold Price ######## 3674  43398 44633
all_df = train_df.loc[:, ["Sold Price", "Last Sold Price"]]
plt.figure(figsize = (20, 6))
sns.scatterplot(data=all_df, x = 'Sold Price', y = 'Last Sold Price')
for i in range(train_df.shape[0]):
    if (train_df.iloc[i].at['Sold Price'] and int(train_df.iloc[i].at['Sold Price']) > 50000000) or int(train_df.iloc[i].at['Last Sold Price']) > 28000000:
        plt.text(train_df.iloc[i].at['Sold Price']+200, train_df.iloc[i].at['Last Sold Price'], train_df.iloc[i].at['Id'])
plt.show()

  

 

train1 = train.drop([3674,6055,32867,34876,43398,44091,44633]).reset_index(drop=True)
y = train1['Sold Price']
train_features = train1.drop('Sold Price', axis=1)  
features = pd.concat([train_features, test]).reset_index(drop=True)
features.shape

  

posted @ 2021-06-21 19:43  哈哈哈喽喽喽  阅读(48)  评论(0编辑  收藏  举报