import numpy as np
import pandas as pd
df=pd.read_csv('train.csv')
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
df.isnull().sum()
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
df.isnull()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
1 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
2 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
3 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
4 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
887 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
888 |
False |
False |
False |
False |
False |
True |
False |
False |
False |
False |
True |
False |
889 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
890 |
False |
False |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
891 rows × 12 columns
df[['Age','Cabin','Embarked']]
|
Age |
Cabin |
Embarked |
0 |
22.0 |
NaN |
S |
1 |
38.0 |
C85 |
C |
2 |
26.0 |
NaN |
S |
3 |
35.0 |
C123 |
S |
4 |
35.0 |
NaN |
S |
... |
... |
... |
... |
886 |
27.0 |
NaN |
S |
887 |
19.0 |
B42 |
S |
888 |
NaN |
NaN |
S |
889 |
26.0 |
C148 |
C |
890 |
32.0 |
NaN |
Q |
891 rows × 3 columns
df1=df.fillna({'Age':0})
df1
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df.loc[df['Age'].isnull()]
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
5 |
6 |
0 |
3 |
Moran, Mr. James |
male |
NaN |
0 |
0 |
330877 |
8.4583 |
NaN |
Q |
17 |
18 |
1 |
2 |
Williams, Mr. Charles Eugene |
male |
NaN |
0 |
0 |
244373 |
13.0000 |
NaN |
S |
19 |
20 |
1 |
3 |
Masselmani, Mrs. Fatima |
female |
NaN |
0 |
0 |
2649 |
7.2250 |
NaN |
C |
26 |
27 |
0 |
3 |
Emir, Mr. Farred Chehab |
male |
NaN |
0 |
0 |
2631 |
7.2250 |
NaN |
C |
28 |
29 |
1 |
3 |
O'Dwyer, Miss. Ellen "Nellie" |
female |
NaN |
0 |
0 |
330959 |
7.8792 |
NaN |
Q |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
859 |
860 |
0 |
3 |
Razi, Mr. Raihed |
male |
NaN |
0 |
0 |
2629 |
7.2292 |
NaN |
C |
863 |
864 |
0 |
3 |
Sage, Miss. Dorothy Edith "Dolly" |
female |
NaN |
8 |
2 |
CA. 2343 |
69.5500 |
NaN |
S |
868 |
869 |
0 |
3 |
van Melkebeke, Mr. Philemon |
male |
NaN |
0 |
0 |
345777 |
9.5000 |
NaN |
S |
878 |
879 |
0 |
3 |
Laleff, Mr. Kristo |
male |
NaN |
0 |
0 |
349217 |
7.8958 |
NaN |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
177 rows × 12 columns
df.loc[df['Age'].isnull(),'Age']=0
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df=df.fillna(0)
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
147 |
2 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
81 |
0 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
147 |
2 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
55 |
2 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
147 |
2 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
147 |
2 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
30 |
2 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
147 |
2 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
60 |
0 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
147 |
1 |
891 rows × 12 columns
df.duplicated()
df[df.duplicated()]
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
a=pd.DataFrame({'A':['a','a','c','c'],
'B':['a','a','c','c'],
'C':['1','1','2','3'],})
a
|
A |
B |
C |
0 |
a |
a |
1 |
1 |
a |
a |
1 |
2 |
c |
c |
2 |
3 |
c |
c |
3 |
a.drop_duplicates()
|
A |
B |
C |
0 |
a |
a |
1 |
2 |
c |
c |
2 |
3 |
c |
c |
3 |
df.drop_duplicates()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
df.to_csv('train_clear.csv')
df['Age bins']=pd.cut(df['Age'],5,labels=list('12345'))
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Age bins |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
0 |
S |
2 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
3 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
0 |
S |
2 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
3 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
0 |
S |
3 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
0 |
S |
2 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
2 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
0 |
S |
1 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
2 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
0 |
Q |
2 |
891 rows × 13 columns
from matplotlib import pyplot as plt
plt.hist(df['Age'])
(array([227., 33., 164., 181., 123., 74., 50., 26., 11., 2.]),
array([ 0., 8., 16., 24., 32., 40., 48., 56., 64., 72., 80.]),
<BarContainer object of 10 artists>)

plt.hist(df['Age bins'])
(array([346., 0., 188., 0., 0., 277., 0., 69., 0., 11.]),
array([0. , 0.4, 0.8, 1.2, 1.6, 2. , 2.4, 2.8, 3.2, 3.6, 4. ]),
<BarContainer object of 10 artists>)

df['Age bins']=pd.cut(df['Age'],[0,5,15,30,50,80],right = False,labels=list('12345'))
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Age bins |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
0 |
S |
3 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
4 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
0 |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
0 |
S |
4 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
0 |
S |
3 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
3 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
0 |
S |
1 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
3 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
0 |
Q |
4 |
891 rows × 13 columns
df['Age bins']=pd.qcut(df['Age'],[0,0.1,0.3,0.5,0.7,0.9],duplicates='drop',labels=list('1234'))
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Age bins |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
0 |
S |
2 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
4 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
0 |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
0 |
S |
4 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
0 |
S |
3 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
2 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
0 |
S |
1 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
3 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
0 |
Q |
3 |
891 rows × 13 columns
df.to_csv('train_bin.csv')
df['Sex'].unique()
array(['male', 'female'], dtype=object)
df['Cabin'].unique()
array([0, 'C85', 'C123', 'E46', 'G6', 'C103', 'D56', 'A6', 'C23 C25 C27',
'B78', 'D33', 'B30', 'C52', 'B28', 'C83', 'F33', 'F G73', 'E31',
'A5', 'D10 D12', 'D26', 'C110', 'B58 B60', 'E101', 'F E69', 'D47',
'B86', 'F2', 'C2', 'E33', 'B19', 'A7', 'C49', 'F4', 'A32', 'B4',
'B80', 'A31', 'D36', 'D15', 'C93', 'C78', 'D35', 'C87', 'B77',
'E67', 'B94', 'C125', 'C99', 'C118', 'D7', 'A19', 'B49', 'D',
'C22 C26', 'C106', 'C65', 'E36', 'C54', 'B57 B59 B63 B66', 'C7',
'E34', 'C32', 'B18', 'C124', 'C91', 'E40', 'T', 'C128', 'D37',
'B35', 'E50', 'C82', 'B96 B98', 'E10', 'E44', 'A34', 'C104',
'C111', 'C92', 'E38', 'D21', 'E12', 'E63', 'A14', 'B37', 'C30',
'D20', 'B79', 'E25', 'D46', 'B73', 'C95', 'B38', 'B39', 'B22',
'C86', 'C70', 'A16', 'C101', 'C68', 'A10', 'E68', 'B41', 'A20',
'D19', 'D50', 'D9', 'A23', 'B50', 'A26', 'D48', 'E58', 'C126',
'B71', 'B51 B53 B55', 'D49', 'B5', 'B20', 'F G63', 'C62 C64',
'E24', 'C90', 'C45', 'E8', 'B101', 'D45', 'C46', 'D30', 'E121',
'D11', 'E77', 'F38', 'B3', 'D6', 'B82 B84', 'D17', 'A36', 'B102',
'B69', 'E49', 'C47', 'D28', 'E17', 'A24', 'C50', 'B42', 'C148'],
dtype=object)
df['Embarked'].unique()
array(['S', 'C', 'Q', 0], dtype=object)
df['Sex'].replace(['male', 'female'],[1,2],inplace=True)
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
Age bins |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
1 |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
0 |
S |
2 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
2 |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
4 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
2 |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
0 |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
2 |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
1 |
35.0 |
0 |
0 |
373450 |
8.0500 |
0 |
S |
4 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
1 |
27.0 |
0 |
0 |
211536 |
13.0000 |
0 |
S |
3 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
2 |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
2 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
2 |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
0 |
S |
1 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
1 |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
3 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
1 |
32.0 |
0 |
0 |
370376 |
7.7500 |
0 |
Q |
3 |
891 rows × 13 columns
from sklearn.preprocessing import LabelEncoder
df['Cabin'] = LabelEncoder().fit_transform(df['Cabin'])
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
147 |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
81 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
147 |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
55 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
147 |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
147 |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
30 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
147 |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
60 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
147 |
Q |
891 rows × 12 columns
df['Embarked'] = LabelEncoder().fit_transform(df['Embarked'])
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
147 |
2 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
81 |
0 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
147 |
2 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
55 |
2 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
147 |
2 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
147 |
2 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
30 |
2 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
147 |
2 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
60 |
0 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
147 |
1 |
891 rows × 12 columns
for column in ['Cabin','Age','Embarked']:
x=pd.get_dummies(df[column],prefix=column)
df=pd.concat([df,x],axis=1)
df.head()
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
... |
Age_66.0 |
Age_70.0 |
Age_70.5 |
Age_71.0 |
Age_74.0 |
Age_80.0 |
Embarked_0 |
Embarked_1 |
Embarked_2 |
Embarked_3 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
... |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
... |
False |
False |
False |
False |
False |
False |
True |
False |
False |
False |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
... |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
... |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
... |
False |
False |
False |
False |
False |
False |
False |
False |
True |
False |
5 rows × 253 columns
df['Title']=df.Name.str.extract(r'([A-Za-z]+)\.')
df
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
... |
Age_70.0 |
Age_70.5 |
Age_71.0 |
Age_74.0 |
Age_80.0 |
Embarked_0 |
Embarked_1 |
Embarked_2 |
Embarked_3 |
Title |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Mr |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
... |
False |
False |
False |
False |
False |
True |
False |
False |
False |
Mrs |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Miss |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Mrs |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Mr |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Rev |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Miss |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
0.0 |
1 |
2 |
W./C. 6607 |
23.4500 |
... |
False |
False |
False |
False |
False |
False |
False |
True |
False |
Miss |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
... |
False |
False |
False |
False |
False |
True |
False |
False |
False |
Mr |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
... |
False |
False |
False |
False |
False |
False |
True |
False |
False |
Mr |
891 rows × 254 columns
left_up=pd.read_csv('data/train-left-up.csv')
left_up
|
PassengerId |
Survived |
Pclass |
Name |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
... |
... |
... |
... |
... |
434 |
435 |
0 |
1 |
Silvey, Mr. William Baird |
435 |
436 |
1 |
1 |
Carter, Miss. Lucile Polk |
436 |
437 |
0 |
3 |
Ford, Miss. Doolina Margaret "Daisy" |
437 |
438 |
1 |
2 |
Richards, Mrs. Sidney (Emily Hocking) |
438 |
439 |
0 |
1 |
Fortune, Mr. Mark |
439 rows × 4 columns
left_down=pd.read_csv('data/train-left-down.csv')
left_down
|
PassengerId |
Survived |
Pclass |
Name |
0 |
440 |
0 |
2 |
Kvillner, Mr. Johan Henrik Johannesson |
1 |
441 |
1 |
2 |
Hart, Mrs. Benjamin (Esther Ada Bloomfield) |
2 |
442 |
0 |
3 |
Hampe, Mr. Leon |
3 |
443 |
0 |
3 |
Petterson, Mr. Johan Emil |
4 |
444 |
1 |
2 |
Reynaldo, Ms. Encarnacion |
... |
... |
... |
... |
... |
447 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
448 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
449 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
450 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
451 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
452 rows × 4 columns
right_up=pd.read_csv('data/train-right-up.csv')
right_up
|
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
434 |
male |
50.0 |
1 |
0 |
13507 |
55.9000 |
E44 |
S |
435 |
female |
14.0 |
1 |
2 |
113760 |
120.0000 |
B96 B98 |
S |
436 |
female |
21.0 |
2 |
2 |
W./C. 6608 |
34.3750 |
NaN |
S |
437 |
female |
24.0 |
2 |
3 |
29106 |
18.7500 |
NaN |
S |
438 |
male |
64.0 |
1 |
4 |
19950 |
263.0000 |
C23 C25 C27 |
S |
439 rows × 8 columns
right_down=pd.read_csv('data/train-right-down.csv')
right_down
|
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
male |
31.0 |
0 |
0 |
C.A. 18723 |
10.500 |
NaN |
S |
1 |
female |
45.0 |
1 |
1 |
F.C.C. 13529 |
26.250 |
NaN |
S |
2 |
male |
20.0 |
0 |
0 |
345769 |
9.500 |
NaN |
S |
3 |
male |
25.0 |
1 |
0 |
347076 |
7.775 |
NaN |
S |
4 |
female |
28.0 |
0 |
0 |
230434 |
13.000 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
447 |
male |
27.0 |
0 |
0 |
211536 |
13.000 |
NaN |
S |
448 |
female |
19.0 |
0 |
0 |
112053 |
30.000 |
B42 |
S |
449 |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.450 |
NaN |
S |
450 |
male |
26.0 |
0 |
0 |
111369 |
30.000 |
C148 |
C |
451 |
male |
32.0 |
0 |
0 |
370376 |
7.750 |
NaN |
Q |
452 rows × 8 columns
result_up=pd.concat([left_up,right_up],axis=1)
result_up
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
434 |
435 |
0 |
1 |
Silvey, Mr. William Baird |
male |
50.0 |
1 |
0 |
13507 |
55.9000 |
E44 |
S |
435 |
436 |
1 |
1 |
Carter, Miss. Lucile Polk |
female |
14.0 |
1 |
2 |
113760 |
120.0000 |
B96 B98 |
S |
436 |
437 |
0 |
3 |
Ford, Miss. Doolina Margaret "Daisy" |
female |
21.0 |
2 |
2 |
W./C. 6608 |
34.3750 |
NaN |
S |
437 |
438 |
1 |
2 |
Richards, Mrs. Sidney (Emily Hocking) |
female |
24.0 |
2 |
3 |
29106 |
18.7500 |
NaN |
S |
438 |
439 |
0 |
1 |
Fortune, Mr. Mark |
male |
64.0 |
1 |
4 |
19950 |
263.0000 |
C23 C25 C27 |
S |
439 rows × 12 columns
result_down=pd.concat([left_down,right_down],axis=1)
result_down
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
440 |
0 |
2 |
Kvillner, Mr. Johan Henrik Johannesson |
male |
31.0 |
0 |
0 |
C.A. 18723 |
10.500 |
NaN |
S |
1 |
441 |
1 |
2 |
Hart, Mrs. Benjamin (Esther Ada Bloomfield) |
female |
45.0 |
1 |
1 |
F.C.C. 13529 |
26.250 |
NaN |
S |
2 |
442 |
0 |
3 |
Hampe, Mr. Leon |
male |
20.0 |
0 |
0 |
345769 |
9.500 |
NaN |
S |
3 |
443 |
0 |
3 |
Petterson, Mr. Johan Emil |
male |
25.0 |
1 |
0 |
347076 |
7.775 |
NaN |
S |
4 |
444 |
1 |
2 |
Reynaldo, Ms. Encarnacion |
female |
28.0 |
0 |
0 |
230434 |
13.000 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
447 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.000 |
NaN |
S |
448 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.000 |
B42 |
S |
449 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.450 |
NaN |
S |
450 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.000 |
C148 |
C |
451 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.750 |
NaN |
Q |
452 rows × 12 columns
result=pd.concat([result_up,result_down])
result=result.reset_index(drop=True)
result
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
up=left_up.join(right_up)
up
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
434 |
435 |
0 |
1 |
Silvey, Mr. William Baird |
male |
50.0 |
1 |
0 |
13507 |
55.9000 |
E44 |
S |
435 |
436 |
1 |
1 |
Carter, Miss. Lucile Polk |
female |
14.0 |
1 |
2 |
113760 |
120.0000 |
B96 B98 |
S |
436 |
437 |
0 |
3 |
Ford, Miss. Doolina Margaret "Daisy" |
female |
21.0 |
2 |
2 |
W./C. 6608 |
34.3750 |
NaN |
S |
437 |
438 |
1 |
2 |
Richards, Mrs. Sidney (Emily Hocking) |
female |
24.0 |
2 |
3 |
29106 |
18.7500 |
NaN |
S |
438 |
439 |
0 |
1 |
Fortune, Mr. Mark |
male |
64.0 |
1 |
4 |
19950 |
263.0000 |
C23 C25 C27 |
S |
439 rows × 12 columns
down=left_down.join(right_down)
down
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
440 |
0 |
2 |
Kvillner, Mr. Johan Henrik Johannesson |
male |
31.0 |
0 |
0 |
C.A. 18723 |
10.500 |
NaN |
S |
1 |
441 |
1 |
2 |
Hart, Mrs. Benjamin (Esther Ada Bloomfield) |
female |
45.0 |
1 |
1 |
F.C.C. 13529 |
26.250 |
NaN |
S |
2 |
442 |
0 |
3 |
Hampe, Mr. Leon |
male |
20.0 |
0 |
0 |
345769 |
9.500 |
NaN |
S |
3 |
443 |
0 |
3 |
Petterson, Mr. Johan Emil |
male |
25.0 |
1 |
0 |
347076 |
7.775 |
NaN |
S |
4 |
444 |
1 |
2 |
Reynaldo, Ms. Encarnacion |
female |
28.0 |
0 |
0 |
230434 |
13.000 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
447 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.000 |
NaN |
S |
448 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.000 |
B42 |
S |
449 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.450 |
NaN |
S |
450 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.000 |
C148 |
C |
451 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.750 |
NaN |
Q |
452 rows × 12 columns
res = pd.concat([up,down])
res=res.reset_index(drop=True)
res
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
up=pd.merge(left_up,right_up,left_index=True,right_index=True)
up
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
434 |
435 |
0 |
1 |
Silvey, Mr. William Baird |
male |
50.0 |
1 |
0 |
13507 |
55.9000 |
E44 |
S |
435 |
436 |
1 |
1 |
Carter, Miss. Lucile Polk |
female |
14.0 |
1 |
2 |
113760 |
120.0000 |
B96 B98 |
S |
436 |
437 |
0 |
3 |
Ford, Miss. Doolina Margaret "Daisy" |
female |
21.0 |
2 |
2 |
W./C. 6608 |
34.3750 |
NaN |
S |
437 |
438 |
1 |
2 |
Richards, Mrs. Sidney (Emily Hocking) |
female |
24.0 |
2 |
3 |
29106 |
18.7500 |
NaN |
S |
438 |
439 |
0 |
1 |
Fortune, Mr. Mark |
male |
64.0 |
1 |
4 |
19950 |
263.0000 |
C23 C25 C27 |
S |
439 rows × 12 columns
down=pd.merge(left_down,right_down,left_index=True,right_index=True)
down
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
440 |
0 |
2 |
Kvillner, Mr. Johan Henrik Johannesson |
male |
31.0 |
0 |
0 |
C.A. 18723 |
10.500 |
NaN |
S |
1 |
441 |
1 |
2 |
Hart, Mrs. Benjamin (Esther Ada Bloomfield) |
female |
45.0 |
1 |
1 |
F.C.C. 13529 |
26.250 |
NaN |
S |
2 |
442 |
0 |
3 |
Hampe, Mr. Leon |
male |
20.0 |
0 |
0 |
345769 |
9.500 |
NaN |
S |
3 |
443 |
0 |
3 |
Petterson, Mr. Johan Emil |
male |
25.0 |
1 |
0 |
347076 |
7.775 |
NaN |
S |
4 |
444 |
1 |
2 |
Reynaldo, Ms. Encarnacion |
female |
28.0 |
0 |
0 |
230434 |
13.000 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
447 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.000 |
NaN |
S |
448 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.000 |
B42 |
S |
449 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.450 |
NaN |
S |
450 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.000 |
C148 |
C |
451 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.750 |
NaN |
Q |
452 rows × 12 columns
res1 = pd.concat([up,down])
res1=res1.reset_index(drop=True)
res1
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
result.to_csv('result.csv')
data=pd.read_csv('result.csv')
data
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 13 columns
data_unit=data.stack()
data_unit
0 Unnamed: 0 0
PassengerId 1
Survived 0
Pclass 3
Name Braund, Mr. Owen Harris
...
890 SibSp 0
Parch 0
Ticket 370376
Fare 7.75
Embarked Q
Length: 10717, dtype: object
df=pd.read_csv('result.csv')
df.head(2)
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
list(df.groupby('Sex'))
[('female',
Unnamed: 0 PassengerId Survived Pclass \
1 1 2 1 1
2 2 3 1 3
3 3 4 1 1
8 8 9 1 3
9 9 10 1 2
.. ... ... ... ...
880 880 881 1 2
882 882 883 0 3
885 885 886 0 3
887 887 888 1 1
888 888 889 0 3
Name Sex Age SibSp \
1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1
2 Heikkinen, Miss. Laina female 26.0 0
3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0
9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1
.. ... ... ... ...
880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0
882 Dahlberg, Miss. Gerda Ulrika female 22.0 0
885 Rice, Mrs. William (Margaret Norton) female 39.0 0
887 Graham, Miss. Margaret Edith female 19.0 0
888 Johnston, Miss. Catherine Helen "Carrie" female NaN 1
Parch Ticket Fare Cabin Embarked
1 0 PC 17599 71.2833 C85 C
2 0 STON/O2. 3101282 7.9250 NaN S
3 0 113803 53.1000 C123 S
8 2 347742 11.1333 NaN S
9 0 237736 30.0708 NaN C
.. ... ... ... ... ...
880 1 230433 26.0000 NaN S
882 0 7552 10.5167 NaN S
885 5 382652 29.1250 NaN Q
887 0 112053 30.0000 B42 S
888 2 W./C. 6607 23.4500 NaN S
[314 rows x 13 columns]),
('male',
Unnamed: 0 PassengerId Survived Pclass \
0 0 1 0 3
4 4 5 0 3
5 5 6 0 3
6 6 7 0 1
7 7 8 0 3
.. ... ... ... ...
883 883 884 0 2
884 884 885 0 3
886 886 887 0 2
889 889 890 1 1
890 890 891 0 3
Name Sex Age SibSp Parch \
0 Braund, Mr. Owen Harris male 22.0 1 0
4 Allen, Mr. William Henry male 35.0 0 0
5 Moran, Mr. James male NaN 0 0
6 McCarthy, Mr. Timothy J male 54.0 0 0
7 Palsson, Master. Gosta Leonard male 2.0 3 1
.. ... ... ... ... ...
883 Banfield, Mr. Frederick James male 28.0 0 0
884 Sutehall, Mr. Henry Jr male 25.0 0 0
886 Montvila, Rev. Juozas male 27.0 0 0
889 Behr, Mr. Karl Howell male 26.0 0 0
890 Dooley, Mr. Patrick male 32.0 0 0
Ticket Fare Cabin Embarked
0 A/5 21171 7.2500 NaN S
4 373450 8.0500 NaN S
5 330877 8.4583 NaN Q
6 17463 51.8625 E46 S
7 349909 21.0750 NaN S
.. ... ... ... ...
883 C.A./SOTON 34068 10.5000 NaN S
884 SOTON/OQ 392076 7.0500 NaN S
886 211536 13.0000 NaN S
889 111369 30.0000 C148 C
890 370376 7.7500 NaN Q
[577 rows x 13 columns])]
df.groupby('Sex').describe()
|
Unnamed: 0 |
PassengerId |
... |
Parch |
Fare |
|
count |
mean |
std |
min |
25% |
50% |
75% |
max |
count |
mean |
... |
75% |
max |
count |
mean |
std |
min |
25% |
50% |
75% |
max |
Sex |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
female |
314.0 |
430.028662 |
256.846324 |
1.0 |
230.75 |
413.5 |
640.25 |
888.0 |
314.0 |
431.028662 |
... |
1.0 |
6.0 |
314.0 |
44.479818 |
57.997698 |
6.75 |
12.071875 |
23.0 |
55.00 |
512.3292 |
male |
577.0 |
453.147314 |
257.486139 |
0.0 |
221.00 |
463.0 |
679.00 |
890.0 |
577.0 |
454.147314 |
... |
0.0 |
5.0 |
577.0 |
25.523893 |
43.138263 |
0.00 |
7.895800 |
10.5 |
26.55 |
512.3292 |
2 rows × 64 columns
df.groupby('Sex')['Age'].describe()
|
count |
mean |
std |
min |
25% |
50% |
75% |
max |
Sex |
|
|
|
|
|
|
|
|
female |
261.0 |
27.915709 |
14.110146 |
0.75 |
18.0 |
27.0 |
37.0 |
63.0 |
male |
453.0 |
30.726645 |
14.678201 |
0.42 |
21.0 |
29.0 |
39.0 |
80.0 |
df.groupby('Sex')['Age'].mean()
Sex
female 27.915709
male 30.726645
Name: Age, dtype: float64
mean_fare_sex=df.groupby('Sex')['Fare'].describe()
mean_fare_sex
|
count |
mean |
std |
min |
25% |
50% |
75% |
max |
Sex |
|
|
|
|
|
|
|
|
female |
314.0 |
44.479818 |
57.997698 |
6.75 |
12.071875 |
23.0 |
55.00 |
512.3292 |
male |
577.0 |
25.523893 |
43.138263 |
0.00 |
7.895800 |
10.5 |
26.55 |
512.3292 |
survived_sex=df.groupby('Sex')['Survived'].sum()
survived_sex
Sex
female 233
male 109
Name: Survived, dtype: int64
survived_Pclass=df.groupby('Pclass')['Survived'].sum()
survived_Pclass
Pclass
1 136
2 87
3 119
Name: Survived, dtype: int64
df.groupby('Sex').agg({'Survived':'sum','Fare':'mean'}).rename(columns={'Survived':'Survived_sum','Fare':'Fare_mean'})
|
Survived_sum |
Fare_mean |
Sex |
|
|
female |
233 |
44.479818 |
male |
109 |
25.523893 |
df.groupby(['Pclass','Age'])['Fare'].mean()
Pclass Age
1 0.92 151.5500
2.00 151.5500
4.00 81.8583
11.00 120.0000
14.00 120.0000
...
3 61.00 6.2375
63.00 9.5875
65.00 7.7500
70.50 7.7500
74.00 7.7750
Name: Fare, Length: 182, dtype: float64
mean_fare_sex.index
Index(['female', 'male'], dtype='object', name='Sex')
survived_sex=survived_sex.to_frame()
type(survived_sex)
pandas.core.frame.DataFrame
pd.merge(survived_sex,mean_fare_sex,on='Sex')
|
Survived |
count |
mean |
std |
min |
25% |
50% |
75% |
max |
Sex |
|
|
|
|
|
|
|
|
|
female |
233 |
314.0 |
44.479818 |
57.997698 |
6.75 |
12.071875 |
23.0 |
55.00 |
512.3292 |
male |
109 |
577.0 |
25.523893 |
43.138263 |
0.00 |
7.895800 |
10.5 |
26.55 |
512.3292 |
survived_age=df.groupby(['Age'])['Survived'].sum()
survived_age
Age
0.42 1
0.67 1
0.75 2
0.83 2
0.92 1
..
70.00 0
70.50 0
71.00 0
74.00 0
80.00 1
Name: Survived, Length: 88, dtype: int64
max(survived_age)
15
survived_age[survived_age.values==max(survived_age)]
Age
24.0 15
Name: Survived, dtype: int64
rate=max(survived_age)/sum(df['Age'].values==24.0)
rate
0.5
f'最大存活率:{rate}'
'最大存活率:0.5'
df=pd.read_csv('result.csv')
df.head(2)
|
Unnamed: 0 |
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
sex=df.groupby('Sex')['Survived'].sum()
sex
Sex
female 233
male 109
Name: Survived, dtype: int64
sex.plot.bar()
plt.title('survived')
Text(0.5, 1.0, 'survived')

df.groupby(['Sex','Survived'])['Survived'].count()
Sex Survived
female 0 81
1 233
male 0 468
1 109
Name: Survived, dtype: int64
sex_survived=df.groupby(['Sex','Survived'])['Survived'].count().unstack()
sex_survived
Survived |
0 |
1 |
Sex |
|
|
female |
81 |
233 |
male |
468 |
109 |
died=sex_survived[0]
died.plot.bar()
plt.title('died')
Text(0.5, 1.0, 'died')

sex_survived.plot.bar()
plt.title('survived and died')
Text(0.5, 1.0, 'survived and died')

sex_survived.plot(kind='bar',stacked='True')
<Axes: xlabel='Sex'>

fare=df.groupby(['Fare','Survived'])['Survived'].count().unstack()
fare
Survived |
0 |
1 |
Fare |
|
|
0.0000 |
14.0 |
1.0 |
4.0125 |
1.0 |
NaN |
5.0000 |
1.0 |
NaN |
6.2375 |
1.0 |
NaN |
6.4375 |
1.0 |
NaN |
... |
... |
... |
227.5250 |
1.0 |
3.0 |
247.5208 |
1.0 |
1.0 |
262.3750 |
NaN |
2.0 |
263.0000 |
2.0 |
2.0 |
512.3292 |
NaN |
3.0 |
248 rows × 2 columns
fare.plot()
<Axes: xlabel='Fare'>

pclass=df.groupby(['Pclass','Survived'])['Survived'].count().unstack()
pclass
Survived |
0 |
1 |
Pclass |
|
|
1 |
80 |
136 |
2 |
97 |
87 |
3 |
372 |
119 |
pclass.plot.bar()
<Axes: xlabel='Pclass'>

df.Age[df.Survived==0].hist(bins=5,alpha=0.5)
df.Age[df.Survived==1].hist(bins=5,alpha=0.5)
plt.legend([0,1])
plt.xlabel('age')
plt.ylabel('count')
Text(0, 0.5, 'count')

df.Age[df.Survived==0].hist(bins=5,alpha=0.5,density=1)
df.Age[df.Survived==1].hist(bins=5,alpha=0.5,density=1)
df.Age[df.Survived==0].plot.density()
df.Age[df.Survived==1].plot.density()
plt.legend([0,1])
plt.xlabel('age')
plt.ylabel('density')
Text(0, 0.5, 'density')

df.Age[df.Pclass==1].plot.density()
<Axes: ylabel='Density'>

unique_pclass=df.Pclass.unique()
unique_pclass.sort()
unique_pclass
array([1, 2, 3], dtype=int64)
for i in unique_pclass:
df.Age[df.Pclass==i].plot.density()
plt.xlabel('age')
plt.legend(unique_pclass)
<matplotlib.legend.Legend at 0x1e2ff3706d0>

import seaborn as sns
for i in unique_pclass:
sns.kdeplot(df.Age[df.Pclass==i],shade=True,linewidth=0)

【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?