python画数据分析第三章的图形
import pandas as pd catering_sale="D:\数据分析\catering_sale.xls" data=pd.read_excel(catering_sale,index_col=u'日期') print(data.describe()) import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus']=False plt.figure() p=data.boxplot(return_type='dict') x=p['fliers'][0].get_xdata() y=p['fliers'][0].get_ydata() y.sort() for i in range(len(x)): if i>0: plt.annotate(y[i],xy=(x[i],y[i]),xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]),y[i])) else: plt.annotate(y[i],xy=(x[i],y[i]),xytext=(x[i]+0.08,y[i])) plt.title('学号3121') plt.show()
import pandas as pd import numpy as np catering_sale = "D:\数据分析\catering_fish_congee.xls" # 餐饮数据 data = pd.read_excel(catering_sale,names=['date','sale']) # 读取数据,指定“日期”列为索引 bins = [0,500,1000,1500,2000,2500,3000,3500,4000] labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)', '[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)'] data['sale分层'] = pd.cut(data.sale, bins, labels=labels) print(data) aggResult = data.groupby(by=['sale分层'])['sale'].agg({"count","count"}) print(aggResult) pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100 print(pAggResult) import matplotlib.pyplot as plt plt.figure(figsize=(10,6)) # 设置图框大小尺寸 pAggResult['count'].plot(kind='bar',width=0.8,fontsize=10) # 绘制频率直方图 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.title('学号3121,季度销售额频率分布直方图',fontsize=20) plt.show()
import pandas as pd import matplotlib.pyplot as plt catering_dish_profit="D:\数据分析\catering_dish_profit.xls" data=pd.read_excel(catering_dish_profit) x=data['盈利'] labels=data['菜品名'] plt.figure(figsize=(8,6)) plt.pie(x,labels=labels) plt.rcParams['font.sans-serif']='SimHei' plt.title('学号3121,菜品销售量分布(饼图)') plt.axis('equal') plt.show() x=data['菜品名'] y=data['盈利'] plt.figure(figsize=(8,4)) plt.bar(x,y) plt.rcParams['font.sans-serif']='SimHei' plt.xlabel('菜品') plt.ylabel('销量') plt.title('学号3121,菜品销售量分布(条形图)') plt.show()
#部门之间销售金额比较 import pandas as pd import matplotlib.pyplot as plt data=pd.read_excel("D:\数据分析\dish_sale.xls") plt.figure(figsize=(8,4)) plt.plot(data['月份'],data['A部门'],color='green',label='A部门',marker='o') plt.plot(data['月份'],data['B部门'],color='red',label='B部门',marker='s') plt.plot(data['月份'],data['C部门'],color='skyblue',label='C部门',marker='x') plt.legend() plt.ylabel('销售额(万元)') plt.title('学号3121,部门之间销售金额比较') plt.show() data=pd.read_excel("D:\数据分析\dish_sale_b.xls") plt.figure(figsize=(8,4)) plt.plot(data['月份'],data['2012年'],color='green',label='2012年',marker='o') plt.plot(data['月份'],data['2013年'],color='red',label='2013年',marker='s') plt.plot(data['月份'],data['2014年'],color='skyblue',label='2014年',marker='x') plt.legend() plt.ylabel('销售额(万元)') plt.show()
import pandas as pd catering_sale="D:\数据分析\catering_sale.xls" data=pd.read_excel(catering_sale,index_col='日期') data=data[(data['销量']>400)&(data['销量']<5000)] statistics=data.describe() statistics.loc['range']=statistics.loc['max']-statistics.loc['min'] statistics.loc['var']=statistics.loc['std']/statistics.loc['mean'] statistics.loc['dis']=statistics.loc['75%']-statistics.loc['25%'] print(statistics)
import pandas as pd import matplotlib.pyplot as plt df_normal=pd.read_csv("D:/数据分析/user.csv") plt.figure(figsize=(8,4)) plt.plot(df_normal["Date"],df_normal["Eletricity"]) plt.xlabel("日期") x_major_locator=plt.MultipleLocator(7) ax=plt.gca() ax.xaxis.set_major_locator(x_major_locator) plt.ylabel("每日电量") plt.title("学号3121,正常用户电量趋势") plt.rcParams['font.sans-serif']=['SimHei'] plt.show() df_steal=pd.read_csv("D:\数据分析\Steal user.csv") plt.figure(figsize=(10,9)) plt.plot(df_steal["Date"],df_steal["Eletricity"]) plt.xlabel("日期") plt.ylabel("日期") x_major_locator=plt.MultipleLocator(7) ax=plt.gca() ax.xaxis.set_major_locator(x_major_locator) plt.title("学号3121,窃电用户电量趋势") plt.rcParams['font.sans-serif']=['SimHei'] plt.show()
import pandas as pd dish_profit="D:\数据分析\catering_dish_profit.xls" data=pd.read_excel(dish_profit,index_col='菜品名') data=data['盈利'].copy() data.sort_values(ascending=False) import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus']=False plt.figure() data.plot(kind='bar') plt.ylabel('盈利(元)') p=1.0*data.cumsum()/data.sum() p.plot(color='pink',secondary_y=True,style='-o',linewidth=2) plt.annotate(format(p[6],'.4%'),xy=(6,p[6]),xytext=(6*0.9,p[6]*0.9), arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=.2")) plt.ylabel('盈利(比例)') plt.title('学号3121') plt.show()
#餐饮销量数据相关性分析 import pandas as pd catering_sale="D:\数据分析\catering_sale_all.xls" data=pd.read_excel(catering_sale,index_col='日期') print(data.corr()) print(data.corr()['百合酱蒸凤爪']) print(data['百合酱蒸凤爪'].corr(data['翡翠蒸香茜饺']))
import matplotlib.pyplot as plt import numpy as np x=np.linspace(0,2*np.pi,50) y=np.sin(x) plt.plot(x,y,'bp--') plt.title('学号3121') plt.show()
import matplotlib.pyplot as plt labels='Frogs','Hogs','Dogs','Logs' sizes=[15,30,45,10] colors=['yellowgreen','gold','lightskyblue','lightcoral'] explode=(0,0.1,0,0) plt.pie(sizes,explode=explode,labels=labels,colors=colors,autopct='%1.1f%%', shadow=True,startangle=90) plt.axis('equal') plt.title('学号3121') plt.show()
import matplotlib.pyplot as plt import numpy as np x=np.random.randn(1000) plt.hist(x,10) plt.title('学号3121') plt.show()
import matplotlib.pyplot as plt import numpy as np import pandas as pd x=np.random.randn(1000) D=pd.DataFrame([x,x+1]).T D.plot(kind='box') plt.title('学号3121') plt.show()
import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus']=False import numpy as np import pandas as pd x=pd.Series(np.exp(np.arange(20))) plt.figure(figsize=(8,9)) ax1=plt.subplot(2,1,1) x.plot(label='原始数据图',legend=True) ax1=plt.subplot(2,1,2) x.plot(logy=True,label='对数数据图',legend=True) plt.title('学号3121') plt.show()
import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus']=False import numpy as np import pandas as pd error=np.random.randn(10) y=pd.Series(np.sin(np.arange(10))) y.plot(yerr=error) plt.title('学号3121') plt.show()
import matplotlib.pyplot as plt years = [2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019] turnovers = [0.5, 9.36, 52, 191, 350, 571, 912, 1027, 1682, 2135, 2684] plt.figure() plt.scatter(years, turnovers, c='pink', s=100, label='legend') plt.xticks(range(2008, 2020, 3)) plt.yticks(range(0, 3200, 800)) plt.xlabel("Year", fontdict={'size': 16}) plt.ylabel("number", fontdict={'size': 16}) plt.title("Title", fontdict={'size': 20}) plt.legend(loc='best') plt.title('学号3121') plt.show()