Pandas入门之四:统计描述

已信任
Jupyter 服务器: 本地
Python 3: Not Started
[1]



import pandas as pd
import numpy as np
[4]



d = {
    'name':pd.Series(['小明','小黑','小红']),
    'age':pd.Series([12,16,14]),
    'score':pd.Series([98,90,77])
}
df = pd.DataFrame(d)
df
name    age    score
0    小明    12    98
1    小黑    16    90
2    小红    14    77



[5]




# sum()求和,默认按照列轴求和
df.sum()
name     小明小黑小红
age          42
score       265
dtype: object
[6]



# 按行求和
df.sum(1)
0    110
1    106
2     91
dtype: int64
[7]



# 求平均 按列
df.mean()
age      14.000000
score    88.333333
dtype: float64
[8]



# 求标准差 按列
df.std()
age       2.000000
score    10.598742
dtype: float64
[9]



# 求最大值
df.max()
name     小黑
age      16
score    98
dtype: object
[10]



# 求绝对值
df[['age', 'score']].abs()
age    score
0    12    98
1    16    90
2    14    77
[12]



df.describe()
age    score
count    3.0    3.000000
mean    14.0    88.333333
std    2.0    10.598742
min    12.0    77.000000
25%    13.0    83.500000
50%    14.0    90.000000
75%    15.0    94.000000
max    16.0    98.000000
[13]



# 按照类别
df.describe(include='object')
name
count    3
unique    3
top    小红
freq    1
[14]



# 查看所有
df.describe(include='all')
name    age    score
count    3    3.0    3.000000
unique    3    NaN    NaN
top    小红    NaN    NaN
freq    1    NaN    NaN
mean    NaN    14.0    88.333333
std    NaN    2.0    10.598742
min    NaN    12.0    77.000000
25%    NaN    13.0    83.500000
50%    NaN    14.0    90.000000
75%    NaN    15.0    94.000000
max    NaN    16.0    98.000000
[-]

 

posted @ 2021-07-13 23:44  vv_869  阅读(56)  评论(0编辑  收藏  举报