【小睿的ML之路】Pandas自定义函数(含泰坦尼克号数据分析案例实战)

import pandas as pd

titanic_survival = pd.read_csv("titanic_train.csv")

# 返回第一百行数据
def hundredth_row(column):
    hundredth_item = column.loc[99]
    return hundredth_item

row = titanic_survival.apply(hundredth_row) # 调用自定义的函数
print(row)
PassengerId                  100
Survived                       0
Pclass                         2
Name           Kantor, Mr. Sinai
Sex                         male
Age                         34.0
SibSp                          1
Parch                          0
Ticket                    244367
Fare                        26.0
Cabin                        NaN
Embarked                       S
dtype: object
def null_count(column): # 计算每列缺失值的数量
    column_null = pd.isnull(column)
    null = column[column_null]
    return len(null)

column_null_count = titanic_survival.apply(null_count) # 调用自定义的函数
print(column_null_count)
PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64
def which_class(row): # 替换列的值
    pclass = row['Pclass']
    if pd.isnull(pclass):
        return 'Unknow'
    elif pclass == 1:
        return 'First Class'
    elif pclass == 2:
        return 'Second Class'
    elif pclass == 3:
        return 'Third Class'
    
classes = titanic_survival.apply(which_class, axis=1) # 调用自定义的函数
print(classes)
0       Third Class
1       First Class
2       Third Class
3       First Class
4       Third Class
           ...     
886    Second Class
887     First Class
888     Third Class
889     First Class
890     Third Class
Length: 891, dtype: object
titanic_survival['pclass_labels'] = classes
pclass_group_survival = titanic_survival.pivot_table(index="pclass_labels",values="Survived") # 统计不同舱位获救人数比例
print(pclass_group_survival)
               Survived
pclass_labels          
First Class    0.629630
Second Class   0.472826
Third Class    0.242363
# 可以看出,头等舱的获救比例较高,三等舱的获救比例相对较低。这符合常识,因为头等舱往往提供更好的设施和服务,可能在遇到危险时更有利于获救。
posted @ 2023-09-18 00:07  郭小睿  阅读(19)  评论(0编辑  收藏  举报