数据分析第三章

1.绘制箱型图并标记异常数据

#%% 箱型图
import pandas as pd
import matplotlib.pyplot as plt

#餐饮数据
catering_sale = r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\第3章源代码\catering_sale.xls"
data = pd.read_excel(catering_sale,index_col = u'日期')

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.figure()

p = data.boxplot(return_type='dict')
#异常值坐标
x = p['fliers'][0].get_xdata()
y = p['fliers'][0].get_ydata()
y.sort()
print(x)
print(len(x))
print(y)
#用annotate添加注释
for i in range(len(x)):
    if i > 0:
        plt.annotate(y[i],xy=(x[i],y[i]), xytext=(x[i]+0.05 - 0.8/(y[i]-y[i-1]),y[i]))
    else:
        plt.annotate(y[i],xy=(x[i],y[i]), xytext=(x[i]+0.08,y[i]))
#展示图  
plt.title('3141') plt.show()

该箱型图简洁明了的绘制出该组数据的离群点

 

2.直方图

#%% 频率直方图
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

catering_sale = r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\第3章源代码\catering_sale.xls"
data = pd.read_excel(catering_sale,names = ['date','sale'])

bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)',
       '[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)'] 

data['sale分层'] = pd.cut(data.sale, bins, labels=labels)
aggResult = data.groupby(by=['sale分层'])['sale'].agg([("count", "count")])

pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100

import matplotlib.pyplot as plt
plt.figure(figsize=(9,6))  # 设置图框大小尺寸
pAggResult['count'].plot(kind='bar',width=0.6,fontsize=10)  # 绘制频率直方图
plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
plt.title('季度销售额频率分布直方图3141',fontsize=20)
plt.show()

 

揭示数据的分布特征和分布类型

 3.饼图

#%% 饼图
import pandas as pd
import matplotlib.pyplot as plt
catering_dish_profit = r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\第3章源代码\catering_dish_profit.xls"
data = pd.read_excel(catering_dish_profit)
x = data['盈利']
labels = data['菜品名']
plt.figure(figsize = (8,6))
plt.pie(x,labels=labels)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.title('菜品销售量分布(饼图)3141')
plt.axis('equal')
plt.show()

 

4.折线图

#%% 折线图
import pandas as pd
import matplotlib.pyplot as plt
data=pd.read_excel(r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\第3章源代码\dish_sale.xls")
plt.figure(figsize=(8,4))
plt.plot(data['月份'],data['A部门'],color='green',label='A部门',marker='o')
plt.plot(data['月份'],data['B部门'],color='red',label='B部门',marker='s')
plt.plot(data['月份'],data['C部门'],color='skyblue',label='C部门',marker='x')
plt.legend()
plt.ylabel('销售额(万元)')
plt.title('3部门之间销售额的比较3141',fontsize=20)
plt.show()

 

5.趋势图

#%% 趋势图
import pandas as pd
import matplotlib.pyplot as plt

df_normal = pd.read_csv(r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\data\\user.csv")
plt.figure(figsize = (8,4))
plt.plot(df_normal["Date"],df_normal["Eletricity"])
plt.xlabel("日期")
x_major_locator = plt.MultipleLocator(7)
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.ylabel("每日电量")
plt.title("正常用户电量趋势3141")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()


df_steal = pd.read_csv(r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\data\\user.csv")
plt.figure(figsize = (10,9))
plt.plot(df_steal["Date"],df_steal["Eletricity"])
plt.xlabel("日期")
plt.ylabel("日期")
x_major_locator = plt.MultipleLocator(7)
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.title("窃电用户电量趋势3141")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.show()

 

 

 6.帕累托图

#%% 帕累托图
import pandas as pd
dish_profit = r"D:\py_project\a_三下\Python数据分析与挖掘实战-源代码与数据\data\catering_dish_profit.xls"
data = pd.read_excel(dish_profit, index_col='菜品名')
data = data['盈利'].copy()
data.sort_values(ascending=False)  # 排序

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  

plt.figure()
data.plot(kind='bar')
plt.ylabel('盈利(元)')
p = 1.0 * data.cumsum() / data.sum()  # 分别计算总盈利额占该盈利额的值
p.plot(color='r', secondary_y=True, style='-o', linewidth=2)
plt.annotate(format(p[6], '.4%'), xy=(6, p[6]), xytext=(6 * 0.9, p[6] * 0.9), 
             arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=.2'))
plt.ylabel('盈利(比例)')
plt.title('菜品盈利数据帕累托图3141')
plt.show()

posted @ 2023-02-26 22:28  Yunnnaaaaa  阅读(12)  评论(0编辑  收藏  举报