目录
1 餐饮店日销售额描述统计
import pandas as pd # 导入pandas库
daily_sales_data = '餐饮店日销售额.xls' ## 保存数据路径,excel文件放在.ipynb同一目录下
data = pd.read_excel(daily_sales_data, index_col = '日期') # 读入数据,索引列为‘日期’
print(data.describe())
销量
count 200.000000
mean 2755.214700
std 751.029772
min 22.000000
25% 2451.975000
50% 2655.850000
75% 3026.125000
max 9106.440000
print('观察值数量为',len(data),'个')
观察值数量为 201 个
type(data)
pandas.core.frame.DataFrame
2 日销售额异常值分析:用箱形图
import matplotlib.pyplot as plt # 导入matplotlib库
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文字符
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.figure() # 新建一个绘图窗口
p = data.boxplot(return_type='dict') # 画箱线图,直接使用DataFrame的方法
y = p['fliers'][0].get_ydata() # 提取异常值的横轴坐标值保存到y,'flies'即为异常值的标签
y.sort() # 从小到大排序,该方法直接改变原对象
plt.show() # 展示箱线图
# p
# p['fliers'][0].get_data()
p['fliers'][0].get_ydata()
array([22.0, 51.0, 60.0, 865.0, 4060.3, 4065.2, 6607.4, 9106.44],
dtype=object)
返回 目录
3 第二季度销售额分析:直方图
import matplotlib.pyplot as plt # 导入matplotlib库
path = '第二季度销售额.xls' ## 保存数据路径为path
data = pd.read_excel(path, names=['date','sale'])
plt.hist(data['sale'],bins=10)
plt.show()
返回 目录
4 不同菜品的利润比较:饼图
import pandas as pd
import matplotlib.pyplot as plt
path = '菜品利润.xls'
profit_data = pd.read_excel(path)
profit_data
|
菜品ID |
菜品名 |
盈利 |
0 |
17148 |
百合酱蒸凤爪 |
9173 |
1 |
17154 |
翡翠蒸香茜饺 |
5729 |
2 |
109 |
金银蒜汁蒸排骨 |
4811 |
3 |
117 |
乐膳真味鸡 |
3594 |
4 |
17151 |
蜜汁焗餐包 |
3195 |
5 |
14 |
生炒菜心 |
3026 |
6 |
2868 |
铁板酸菜豆腐 |
2378 |
7 |
397 |
香煎韭菜饺 |
1970 |
8 |
88 |
香煎罗卜糕 |
1877 |
9 |
426 |
原汁原味菜心 |
1782 |
x = profit_data['盈利']
labels = profit_data['菜品名']
plt.figure()
plt.pie(x,labels = labels)
plt.axis('equal') # xy轴刻度等长,正圆形
plt.show()
5 不同菜品利润比较:条形图
x = profit_data['菜品名']
y = profit_data['盈利']
plt.figure(figsize=(9,3))
plt.bar(x,y, color = 'grey')
plt.xticks(rotation=30)
plt.xlabel('菜品')
plt.ylabel('利润')
plt.title('菜品利润分布')
plt.show()
6 销售额趋势对比:折线图
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_excel('各部门销售额.xls')
plt.figure( figsize = [11,3])
plt.plot(data['月份'],data['A部门'], color = 'green', label='A部门', marker = 'o')
plt.plot(data['月份'],data['B部门'], color = 'red', label='B部门', marker = 's')
plt.plot(data['月份'],data['C部门'], color = 'skyblue', label='C部门', marker = 'x')
plt.legend()
plt.ylabel('销售额 万元')
plt.show()
7 B部门历年销售额趋势比较:折线图
data = pd.read_excel('B部门销售额.xls')
plt.figure( figsize = [11,3])
plt.plot(data['月份'],data['2012年'], color = 'green', label = '2012年', marker = 'o')
plt.plot(data['月份'],data['2013年'], color = 'red', label = '2013年', marker = 's')
plt.plot(data['月份'],data['2014年'], color = 'skyblue', label = '2014年', marker = 'x')
plt.legend()
plt.ylabel('销售额 万元')
plt.show()
8 描述统计:进阶
import pandas as pd
path = '餐饮店日销售额.xls'
data = pd.read_excel(path, index_col='日期')
data
|
销量 |
日期 |
|
2015-03-01 |
51.0 |
2015-02-28 |
2618.2 |
2015-02-27 |
2608.4 |
2015-02-26 |
2651.9 |
2015-02-25 |
3442.1 |
... |
... |
2014-08-06 |
2915.8 |
2014-08-05 |
2618.1 |
2014-08-04 |
2993.0 |
2014-08-03 |
3436.4 |
2014-08-02 |
2261.7 |
201 rows × 1 columns
data = data[(data['销量']>865.0)&(data['销量']<4060.3)] ## 过滤异常值
statistics = data.describe()
statistics
|
销量 |
count |
192.000000 |
mean |
2740.654167 |
std |
382.793227 |
min |
1958.000000 |
25% |
2464.350000 |
50% |
2655.850000 |
75% |
3019.075000 |
max |
3802.800000 |
statistics.loc['range'] = statistics.loc['max'] - statistics.loc['min']
statistics
|
销量 |
count |
192.000000 |
mean |
2740.654167 |
std |
382.793227 |
min |
1958.000000 |
25% |
2464.350000 |
50% |
2655.850000 |
75% |
3019.075000 |
max |
3802.800000 |
range |
1844.800000 |
statistics.loc['var'] = statistics.loc['std']/statistics.loc['mean'] #变异系数
statistics
|
销量 |
count |
192.000000 |
mean |
2740.654167 |
std |
382.793227 |
min |
1958.000000 |
25% |
2464.350000 |
50% |
2655.850000 |
75% |
3019.075000 |
max |
3802.800000 |
range |
1844.800000 |
var |
0.139672 |
statistics.loc['distance'] = statistics.loc['75%'] - statistics.loc['25%']
statistics
|
销量 |
count |
192.000000 |
mean |
2740.654167 |
std |
382.793227 |
min |
1958.000000 |
25% |
2464.350000 |
50% |
2655.850000 |
75% |
3019.075000 |
max |
3802.800000 |
range |
1844.800000 |
var |
0.139672 |
distance |
554.725000 |
9 用电量分析
正常趋势分析
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文字符
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
df_normal = pd.read_csv('正常用电数据.csv')
df_normal.head()
|
Date |
Eletricity |
0 |
2012/2/1 |
6200 |
1 |
2012/2/2 |
6440 |
2 |
2012/2/3 |
6440 |
3 |
2012/2/4 |
4293 |
4 |
2012/2/5 |
2146 |
plt.figure( figsize = [16,3] )
plt.plot(df_normal['Date'],df_normal['Eletricity'])
plt.xlabel('日期')
plt.ylabel('正常用电趋势')
x_major_locator = plt.MultipleLocator(8) ## 调整x轴坐标间距
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.show()
窃电趋势分析
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文字符
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
df_steal = pd.read_csv('窃电用户数据.csv')
df_steal.head()
|
Date |
Eletricity |
0 |
2012/2/1 |
6100 |
1 |
2012/2/2 |
6312 |
2 |
2012/2/3 |
6240 |
3 |
2012/2/4 |
4293 |
4 |
2012/2/5 |
3346 |
plt.figure( figsize = [16,3] )
plt.plot(df_steal['Date'],df_steal['Eletricity'])
plt.xlabel('日期')
plt.ylabel('窃电用电趋势')
x_major_locator = plt.MultipleLocator(8) ## 调整x轴坐标间距
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.show()
对比分析
plt.figure( figsize = [16,3] )
plt.plot(df_normal['Date'],df_normal['Eletricity'], color = 'blue',label = '正常')
plt.plot(df_steal['Date'],df_steal['Eletricity'], color = 'red', label='窃电')
plt.xlabel('日期')
plt.ylabel('用电趋势')
plt.legend()
x_major_locator = plt.MultipleLocator(8) ## 调整x轴坐标间距
ax = plt.gca()
ax.xaxis.set_major_locator(x_major_locator)
plt.show()