Python数据可视化-seaborn
Seaborn其实是在matplotlib的基础上进行了更高级的API封装,从而使得作图更加容易,在大多数情况下使用seaborn就能做出很具有吸引力的图。这里实例采用的数据集都是seaborn提供的几个经典数据集,dataset文件可见于Github。本博客只总结了一些,方便博主自己查询,详细介绍可以看seaborn官方API和example gallery,官方文档还是写的很好的。
1 set_style( ) set( )
set_style( )是用来设置主题的,Seaborn有五个预设好的主题: darkgrid , whitegrid , dark , white ,和 ticks 默认: darkgrid
- import matplotlib.pyplot as plt
- import seaborn as sns
- sns.set_style("whitegrid")
- plt.plot(np.arange(10))
- plt.show()
- import seaborn as sns
- import matplotlib.pyplot as plt
- sns.set(style="white", palette="muted", color_codes=True) #set( )设置主题,调色板更常用
- plt.plot(np.arange(10))
- plt.show()
2 distplot( ) kdeplot( )
- import matplotlib.pyplot as plt
- import seaborn as sns
- df_iris = pd.read_csv('../input/iris.csv')
- fig, axes = plt.subplots(1,2)
- sns.distplot(df_iris['petal length'], ax = axes[0], kde = True, rug = True) # kde 密度曲线 rug 边际毛毯
- sns.kdeplot(df_iris['petal length'], ax = axes[1], shade=True) # shade 阴影
- plt.show()
- import numpy as np
- import seaborn as sns
- import matplotlib.pyplot as plt
- sns.set( palette="muted", color_codes=True)
- rs = np.random.RandomState(10)
- d = rs.normal(size=100)
- f, axes = plt.subplots(2, 2, figsize=(7, 7), sharex=True)
- sns.distplot(d, kde=False, color="b", ax=axes[0, 0])
- sns.distplot(d, hist=False, rug=True, color="r", ax=axes[0, 1])
- sns.distplot(d, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1, 0])
- sns.distplot(d, color="m", ax=axes[1, 1])
- plt.show()
3 箱型图 boxplot( )
- import matplotlib.pyplot as plt
- import seaborn as sns
- df_iris = pd.read_csv('../input/iris.csv')
- sns.boxplot(x = df_iris['class'],y = df_iris['sepal width'])
- plt.show()
- import matplotlib.pyplot as plt
- import seaborn as sns
- tips = pd.read_csv('../input/tips.csv')
- sns.set(style="ticks") #设置主题
- sns.boxplot(x="day", y="total_bill", hue="sex", data=tips, palette="PRGn") #palette 调色板
- plt.show()
4 联合分布jointplot( )
- tips = pd.read_csv('../input/tips.csv') #右上角显示相关系数
- sns.jointplot("total_bill", "tip", tips)
- plt.show()
- tips = pd.read_csv('../input/tips.csv')
- sns.jointplot("total_bill", "tip", tips, kind='reg')
- plt.show()
5 热点图heatmap( )
internal_chars = ['full_sq', 'life_sq', 'floor', 'max_floor', 'build_year', 'num_room', 'kitch_sq', 'state', 'price_doc']
corrmat = train[internal_chars].corr()
f, ax = plt.subplots(figsize=(10, 7))
plt.xticks(rotation='90')
sns.heatmap(corrmat, square=True, linewidths=.5, annot=True)
plt.show()
plt.scatter(x=train['full_sq'], y=train['price_doc'], c='r')
plt.xlim(0,500)
plt.show()
7.pointplot画出变量间的关系
grouped_df = train_df.groupby('floor')['price_doc'].aggregate(np.median).reset_index()
plt.figure(figsize=(12,8))
sns.pointplot(grouped_df.floor.values, grouped_df.price_doc.values, alpha=0.8, color=color[2])
plt.ylabel('Median Price', fontsize=12)
plt.xlabel('Floor number', fontsize=12)
plt.xticks(rotation='vertical') plt.show()
8 pairplot( )
- import matplotlib.pyplot as plt
- import seaborn as sns
- data = pd.read_csv("../input/iris.csv")
- sns.set() #使用默认配色
- sns.pairplot(data,hue="class") #hue 选择分类列
- plt.show()
- import seaborn as sns
- import matplotlib.pyplot as plt
- iris = pd.read_csv('../input/iris.csv')
- sns.pairplot(iris, vars=["sepal width", "sepal length"],hue='class',palette="husl")
- plt.show()
9 FacetGrid( )
- import seaborn as sns
- import matplotlib.pyplot as plt
- tips = pd.read_csv('../input/tips.csv')
- g = sns.FacetGrid(tips, col="time", row="smoker")
- g = g.map(plt.hist, "total_bill", color="r")
- plt.show()
10 barplot( )
f, ax=plt.subplots(figsize=(12,20))
#orient='h'表示是水平展示的,alpha表示颜色的深浅程度
sns.barplot(y=group_df.sub_area.values, x=group_df.price_doc.values,orient='h', alpha=0.8, color='red')
#设置y轴、X轴的坐标名字与字体大小
plt.ylabel('price_doc', fontsize=16)
plt.xlabel('sub_area', fontsize=16)
#设置X轴的各列下标字体是水平的
plt.xticks(rotation='horizontal')
#设置Y轴下标的字体大小
plt.yticks(fontsize=15)
plt.show()
注:如果orient='v'表示成竖直显示的话,一定要记得y=group_df.sub_area.values, x=group_df.price_doc.values调换一下坐标轴,否则报错
11.bar图
import matplotlib.pyplot as plt
import numpy as np
plt.rc('font', family='SimHei', size=13)
num = np.array([13325, 9403, 9227, 8651])
ratio = np.array([0.75, 0.76, 0.72, 0.75])
men = num * ratio
women = num * (1-ratio)
x = ['聊天','支付','团购\n优惠券','在线视频']
width = 0.5
idx = np.arange(len(x))
plt.bar(idx, men, width, color='red', label='男性用户')
plt.bar(idx, women, width, bottom=men, color='yellow', label='女性用户') #这一块可是设置bottom,top,如果是水平放置的,可以设置right或者left。
plt.xlabel('应用类别')
plt.ylabel('男女分布')
plt.xticks(idx+width/2, x, rotation=40)
plt.legend()
plt.show()
也可以参考:http://seaborn.pydata.org/tutorial/distributions.html
知乎专栏关于seaborn的:https://zhuanlan.zhihu.com/p/27570774