第一周作业 第3章数据探索

总结 

.............

 

图3-1

 

import pandas as pd
catering_sale = 'catering_sale.xls'
data = pd.read_excel(catering_sale,index_col = u'日期')
print(data.describe())
import pandas as pd
import numpy as np
plt.title('3150')
x = np.linspace(0.2*np.pi,25,endpoint=True)
y = np.sin(x)
plt.plot(x, y, 'bp-')
plt.show()

 

 

 

 

 

 

 

 

图3-2

import matplotlib.pyplot as plt #导入图像库
from numpy import nan as NA
import pandas as pd
plt.title('3150')
# 读取数据源
xlsFilename = "catering_sale.xls"
df = pd.read_excel(xlsFilename)
# 缺失值
# 获得缺失值的个数
missingNumCountDf = df.isnull().sum()
print("\n缺失值个数")
print(missingNumCountDf)
# 获得缺失率
missingPercentDf = 100 * (missingNumCountDf / df.count())
print("\n缺失率")
print(missingPercentDf)
# 获得缺失记录
missingDf = df[df.iloc[:,1].isnull()]
print("\n缺失记录")
print(missingDf)
# 过滤掉缺失值
df2 = df.dropna()
# 显示箱图
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
#bp = df2.boxplot()
bp = plt.boxplot(df2.values[:,1], patch_artist=True)

fliers = bp['fliers']
for fly in fliers:
  data = fly.get_data()
  for (x,y) in fly.get_xydata():
    plt.text(x, y, "%.2f" % y, verticalalignment="top", horizontalalignment='right')

  print("\n异常值")
  print(data[1])

plt.show()

 

 

 图3-3

import pandas as pd
import numpy as np
catering_sale = 'D:\python\挖掘学习实训/catering_fish_congee.xls' # 餐饮数据
data = pd.read_excel(catering_sale,names=['date','sale']) # 读取数据,指定“日期”列为索引

bins = [0,500,1000,1500,2000,2500,3000,3500,4000]
labels = ['[0,500)','[500,1000)','[1000,1500)','[1500,2000)',
'[2000,2500)','[2500,3000)','[3000,3500)','[3500,4000)']

data['sale分层'] = pd.cut(data.sale, bins, labels=labels)
aggResult = data.groupby('sale分层').agg({'sale':'count'})


pAggResult = round(aggResult/aggResult.sum(), 2, ) * 100

import matplotlib.pyplot as plt
plt.figure(figsize=(10,6)) # 设置图框大小尺寸
pAggResult['sale'].plot(kind='bar',width=0.8,fontsize=10) # 绘制频率直方图
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.title('3150\n季度销售额频率分布直方图',fontsize=20)
plt.show()

 

 

图3-4

import pandas as pd
plt.title('学号:50')
dish_profit = 'D:\python\挖掘学习实训/catering_dish_profit.xls' #餐饮菜品盈利数据
data = pd.read_excel(dish_profit, index_col = u'菜品名')
data = data[u'盈利'].copy()
data.sort_index(ascending = False)
sizes=[i/data.sum() for i in data]
colors=['red','green']
plt.pie(sizes,labels=data.index,colors=colors,autopct='%1.1f%%',shadow=True,startangle=90)
plt.axis('equal')

 

 

图3-8

from __future__ import print_function
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常显示中文
plt.rcParams['axes.unicode_minus'] = False # 正常显示负号'-'

dish_profit = 'D:\python\挖掘学习实训/catering_dish_profit.xls'
data = pd.read_excel(dish_profit, index_col='菜品名')
data = data['盈利'].copy()
# data.sort(ascending=False)
# data.sort_values(ascending=False)
data.sort_index(ascending=False)

plt.figure()
data.plot(kind='bar')
plt.ylabel('盈利(元)')
p = 1.0 * data.cumsum() / data.sum()
p.plot(color='r', secondary_y=True, style='-o', linewidth=2)
plt.annotate(
format(p[6], '.4%'), xy=(6, p[6]), xytext=(6 * 0.9, p[6] * 0.9),
arrowprops=dict(arrowstyle='->', connectionstyle='arc3, rad=.2')
) # 添加注释,即85%处的标记,这里包括了指定箭头样式
plt.ylabel('盈利(比例)')
plt.title('学号:3150')
plt.show()

 

 

图3-16 3-17

x = np.linspace(0, 2 * np.pi, 50)
y = np.sin(x)
plt.plot(x, y, "bp--")
plt.title('学号:3150')
plt.show()
# pie
# the slices will be ordered and plotted counter-clockwise
labels = ["frogs", "hogs", "dogs", "logs"]
sizes = [15, 30, 45, 10]
colors = ["yellowgreen", "gold", "lightskyblue", "lightcoral"]
explode = (0, 0.1, 0, 0)
plt.pie(sizes, explode=explode, labels=labels,
    colors=colors, autopct="%1.1f%%", shadow=True, startangle=45)
plt.axis("equal")
plt.title('学号:3150')
plt.show()

 

 

图3-18  3-19

# hist
x = np.random.randn(1000)
plt.hist(x, 10) # 分成10组绘制直方困
plt.title('学号:3150')
plt.show()
# boxplot
x = np.random.randn(1000)
D = pd.DataFrame([x, x + 1]).T
D.plot(kind="box")
plt.title('学号:3150')
plt.show()

 

 

图3-20

# plot logx, logy
x = pd.Series(np.exp(np.arange(20)))
fig = plt.figure(figsize=(8, 4))
axs = fig.subplots(1, 2)
x.plot(ax=axs[0], label=u"原始数据图", legend=True)
x.plot(ax=axs[1], logy=True, label=u"对数数据图", legend=True)
plt.title('学号:3150')
plt.show()

 

posted @ 2023-02-26 20:35  好想看你的微笑  阅读(28)  评论(0编辑  收藏  举报