import pandas as pd
import numpy as np
import seaborn as sns
from pandas import Series,DataFrame
tips=pd.read_csv('tips.csv')
tips.head(20)
|
total_bill |
tip |
sex |
smoker |
day |
time |
size |
0 |
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
1 |
10.34 |
1.66 |
Male |
No |
Sun |
Dinner |
3 |
2 |
21.01 |
3.50 |
Male |
No |
Sun |
Dinner |
3 |
3 |
23.68 |
3.31 |
Male |
No |
Sun |
Dinner |
2 |
4 |
24.59 |
3.61 |
Female |
No |
Sun |
Dinner |
4 |
5 |
25.29 |
4.71 |
Male |
No |
Sun |
Dinner |
4 |
6 |
8.77 |
2.00 |
Male |
No |
Sun |
Dinner |
2 |
7 |
26.88 |
3.12 |
Male |
No |
Sun |
Dinner |
4 |
8 |
15.04 |
1.96 |
Male |
No |
Sun |
Dinner |
2 |
9 |
14.78 |
3.23 |
Male |
No |
Sun |
Dinner |
2 |
10 |
10.27 |
1.71 |
Male |
No |
Sun |
Dinner |
2 |
11 |
35.26 |
5.00 |
Female |
No |
Sun |
Dinner |
4 |
12 |
15.42 |
1.57 |
Male |
No |
Sun |
Dinner |
2 |
13 |
18.43 |
3.00 |
Male |
No |
Sun |
Dinner |
4 |
14 |
14.83 |
3.02 |
Female |
No |
Sun |
Dinner |
2 |
15 |
21.58 |
3.92 |
Male |
No |
Sun |
Dinner |
2 |
16 |
10.33 |
1.67 |
Female |
No |
Sun |
Dinner |
3 |
17 |
16.29 |
3.71 |
Male |
No |
Sun |
Dinner |
3 |
18 |
16.97 |
3.50 |
Female |
No |
Sun |
Dinner |
3 |
19 |
20.65 |
3.35 |
Male |
No |
Sat |
Dinner |
3 |
grouped=tips['tip'].groupby(tips['sex'])
grouped=tips.groupby(tips['sex'])
grouped
grouped.mean()
grouped.sum()
data_mean=tips['tip'].groupby([tips['sex'],tips['time']]).mean()
data_mean
data_mean.plot(kind='barh')
for name,group in tips['tip'].groupby(tips['sex']):
print(name)
print(group)
tips.groupby(tips['sex']).size()
grouped=tips['tip'].groupby(tips['sex'])
grouped=tips.groupby(tips['sex'])
smoker_mean=tips.groupby('smoker').mean()
smoker=tips.groupby('somker',group_keys=False)['tip']
smoker_mean=tips.groupby(['sex','smoker']).mean()
smoker_mean=tips.groupby(['sex','smoker'],as_index=False).mean()
size_mean1=tips.groupby('size')['tip'].mean()
size_mean2=tips['tip'].groupby(tips['size']).mean()
size_mean1==size_mean2
df=DataFrame(np.arange(16).reshape(4,4),index=['a','b','a','b'])
df.groupby(df.index).mean()
df=DataFrame(np.arange(16).reshape(4,4))
list1=['a','b','a','b']
df.groupby(list1).mean()
df=DataFrame(np.arange(16).reshape(4,4),index=['a','b','A','B'])
dict1={
'a':'one',
'A':'one',
'b':'two',
'B':'two'
}
df.groupby(dict1).mean()
df=DataFrame(np.random.randn(4,4))
df
df.groupby(df[3].map(lambda x:'a' if x>=0 else 'b')).sum()
df=DataFrame(np.arange(16).reshape(4,4),
index=[['one','one','two','two'],['a','b','a','b']],
columns=[['apple','apple','orange','orange'],['red','green','red','green']])
df
df.groupby(level=0,axis=1).sum()
max_tip=tips.groupby('sex')['tip'].max()
max_tip
max_tip.plot(kind='bar')
def get_range(x):
'''接收一个数字序列,或数字列表,得到数字的范围'''
return x.max()-x.min()
tips_range=tips.groupby('sex')['tip'].agg(lambda x:x.max()-x.min())
tips_range
def get_range(x):
'''接收一个数字序列,或数字列表,得到数字的范围'''
return x.max()-x.min()
tips.groupby(['sex','smoker'])['tip'].agg(['mean','std',get_range])
tips.groupby(['sex','smoker'])['tip'].agg([('tip_mean','mean'),('range',get_range)])
tips.groupby(['day','time'])['tip','total_bill'].agg([('tip_mean','mean'),('range',get_range)])
tips.groupby(['day','time'])['total_bill','tip'].agg({'total_bill':['sum','mean'],'tip':'mean'})
tips.groupby('sex').transform('mean')
tips.groupby('sex')['tip'].transform('mean')
tips.groupby('sex').apply(lambda x:x.mean())
tips.groupby('sex')['tip'].apply(lambda x:x.mean())
tips.groupby('sex').transform('mean')
tips.groupby('sex')['tip'].transform('mean')
df.groupby('sex').apply(lambda x:x.fillna(x.mean()))
df.groupby('sex')['math'].apply(lambda x:x.fillna(x.mean()))
tips['tip_mean_by_sex']=tips.groupby('sex')['tip'].transform('mean')
tip_mean_by_sex=tips.groupby('sex')['tip'].mean()
tip_mean_by_sex
tip_mean_by_sex_df=DataFrame(tip_mean_by_sex)
tip_mean_by_sex_df
new_tips=pd.merge(tips,tip_mean_by_sex_df,left_on='sex',right_index=True,suffixes=('','_mean_by_sex'),how='left')
new_tips.head(10)
new_tips=tips.copy()
new_tips['tip_mean_by_sex']=tips.groupby('sex')['tip'].transform('mean')
new_tips.head(10)
tips.groupby('sex').apply(lambda x:x.sort_values(by='tip',ascending=False)[:5])
tips.groupby('sex',group_keys=False).apply(lambda x:x.sort_values(by='tip',ascending=False)[:5])
data={
'name':['张三','李四',np.nan,'王五','小明','马六'],
'sex':['female','female','male','male','male','female'],
'math':[67,77,np.nan,82,90,np.nan],
'English':[67,77,np.nan,82,90,np.nan]
}
df=DataFrame(data)
df.fillna(df['math'].mean())
df.fillna(df['English'].mean())
df.groupby('sex').apply(lambda x:x.fillna(x.mean()))
df.groupby('sex')['math'].apply(lambda x:x.fillna(x.mean()))
tips.pivot_table(values='tip',index='sex',columns='smoker')
tips.pivot_table(values='tip',index='sex',columns='smoker',aggfunc='sum')
tips.pivot_table(values='tip',index='sex',columns='smoker',aggfunc='sum',margins=True)
tips.groupby(['sex','smoker'])['tip'].mean().unstack()
tips.groupby(['sex','smoker'])['tip'].sum().unstack()
sex_smoker=tips.groupby(['sex','smoker'])['tip'].sum().unstack()
sex_smoker['All']=sex_smoker['No']+sex_smoker['Yes']
sex_smoker=sex_smoker.append({'No':sex_smoker['No'].sum(),'Yes':sex_smoker['Yes'].sum()},ignore_index=True)
sex_smoker.index.name='Sex'
sex_smoker.index=['Female','Male','All']
sex_smoker
cross_table=pd.cross_table(index=tips['day'],columns=tips['size'])
cross_table
df=cross_table.div(cross_table.sum(axis=1),axis=0)
df
df.plot(kind='bar',stacked=True)
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步