Matplotlib

  1. Matplotlib 是 Python 的绘图库。 它可与 NumPy 一起使用,提供了一种有效的 MatLab 开源替代方案。

import pandas as pd
import altair as alt
%matplotlib inline


df = pd.read_csv('pandas-2.csv',encoding = "utf-8",delimiter=",",error_bad_lines=False)
df.set_index(pd.to_datetime(df["time"]),inplace=True)

# df
df.loc[:,'throughput'].plot()



import matplotlib.pyplot as plt

# 画一行
df.iloc[5].plot()
plt.show()

# 画10行
for i in df.index:
    df.iloc[i].plot(label=str(i))
plt.legend()
plt.show()


# 画一列
df['A'].plot()
plt.show()
  1. 简单的多个图(subplot)使用示例
https://blog.csdn.net/leilei7407/article/details/104969532/

import matplotlib.pyplot as plt
import numpy as np
# np.random.seed(0)
x = np.arange(5)
y = np.random.randint(-5,5,5)

plt.subplot(1,2,1)
plt.bar(x,y,color='blue')
#在0位置水平方向添加蓝色的线条
plt.axhline(0,color='blue',linewidth=2)

plt.subplot(1,2,2)
#barh 将y和x进行对换,竖着方向为x轴
plt.barh(x,y,color='red')
#在0位置垂直方向添加红色线条
plt.axvline(0,color='red',linewidth=2)

plt.show()

  1. 简单的多图叠加显示
https://blog.csdn.net/leilei7407/article/details/104969532/

# plt.legend()函数主要的作用就是给图加上图例

import matplotlib.pyplot as plt

x = [1,2,3,4,5,6,7,8,]

y1 = [1,2,3,4,55,6,6,7]
y2 = [6,20,9,2,5,8,2,8,]

plt.plot(x,y1)
plt.plot(x,y2)

plt.legend(['y1','y2'])
plt.show()


  1. 多层索引画图前需要转换

import pandas as pd
#多层索引
df=pd.DataFrame(np.arange(1,5).reshape((4,1)),index=[['a','b','c','d'],['A','B','C','D']],columns=['data',])

# 可有可无的设置index名称
df=df.rename_axis(index=['index1', 'index2'])

#转换成一层索引
df=df.reset_index()

#再次变成 多层索引
df.set_index(['index1', 'index2'], inplace=True)

# columns的name变成index
plt = df.unstack(fill_value=0)['data']
plt.rename(columns={'A':'aa', 'B':'b', 'C':'c'}, inplace = True)
plt

  1. 百分比柱状图
https://chrisalbon.com/python/data_visualization/matplotlib_percentage_stacked_bar_plot/
https://blog.csdn.net/lys_828/article/details/106524459


# 官方1(纵向)
# libraries
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import pandas as pd

# Data
r = [0,1,2,3,4]
raw_data = {'greenBars': [20, 1.5, 7, 10, 5], 'orangeBars': [5, 15, 5, 10, 15],'blueBars': [2, 15, 18, 5, 10]}
df = pd.DataFrame(raw_data)

# From raw value to percentage
totals = [i+j+k for i,j,k in zip(df['greenBars'], df['orangeBars'], df['blueBars'])]
greenBars = [i / j * 100 for i,j in zip(df['greenBars'], totals)]
orangeBars = [i / j * 100 for i,j in zip(df['orangeBars'], totals)]
blueBars = [i / j * 100 for i,j in zip(df['blueBars'], totals)]

# plot
barWidth = 0.85
names = ('A','B','C','D','E')
# Create green Bars
plt.bar(r, greenBars, color='#b5ffb9', edgecolor='white', width=barWidth)
# Create orange Bars
plt.bar(r, orangeBars, bottom=greenBars, color='#f9bc86', edgecolor='white', width=barWidth)
# Create blue Bars
plt.bar(r, blueBars, bottom=[i+j for i,j in zip(greenBars, orangeBars)], color='#a3acff', edgecolor='white', width=barWidth)

# Custom x axis
plt.xticks(r, names)
plt.xlabel("group")

# Show graphic
plt.show()



# 官方2(横向)
import numpy as np
import matplotlib.pyplot as plt

category_names = ['Strongly disagree', 'Disagree',
                  'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
    'Question 1': [10, 15, 17, 32, 26],
    'Question 2': [26, 22, 29, 10, 13],
    'Question 3': [35, 37, 7, 2, 19],
    'Question 4': [32, 11, 9, 15, 33],
    'Question 5': [21, 29, 5, 5, 40],
    'Question 6': [8, 19, 5, 30, 38]
}

def survey(results, category_names):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    labels = list(results.keys())
    data = np.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = plt.get_cmap('RdYlGn')(
        np.linspace(0.15, 0.85, data.shape[1]))

    fig, ax = plt.subplots(figsize=(9.2, 5))
    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, np.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        ax.barh(labels, widths, left=starts, height=0.5,
                label=colname, color=color)
        xcenters = starts + widths / 2

        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        for y, (x, c) in enumerate(zip(xcenters, widths)):
            ax.text(x, y, str(int(c)), ha='center', va='center',
                    color=text_color)
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='small')

    return fig, ax


survey(results, category_names)
plt.show()



# 修改版本2(纵向)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


def percentage_bar(df):
    labels = df.index.tolist() #提取分类显示标签,                 # Strongly agree
    results = df.to_dict(orient = 'list')  #将数值结果转化为字典, # key:list
    category_names = list(results.keys()) # 提取字典里面的类别(键-key)# Question 1
    data = np.array(list(results.values())) #提取字典里面的数值(值-value)

    category_colors = plt.get_cmap('RdYlGn')(np.linspace(0.15, 0.85, data.shape[0])) # np.linspace 等差数列
    #设置占比显示的颜色,可以自定义,修改括号里面的参数即可,如下
	#category_colors = plt.get_cmap('hot')(np.linspace(0.15, 0.85, data.shape[0]))
	
	
    fig, ax = plt.subplots(figsize=(12, 9)) #创建画布,开始绘图
    ax.invert_xaxis()#这个可以通过设置df中columns的顺序调整
    ax.yaxis.set_visible(False) #设置y轴刻度不可见
    ax.set_xticklabels(labels=labels, rotation=90) #显示x轴标签,并旋转90度
    ax.set_ylim(0,1) #设置y轴的显示范围
    starts = 0  #绘制基准
    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        heights = data[i,: ]/ data.sum(axis =0) #计算出每次遍历时候的百分比
        ax.bar(labels, heights, bottom=starts, width=0.5,label=colname, color=color,edgecolor ='gray') # 绘制柱状图
        xcenters = starts + heights/2  #进行文本标记位置的选定
        starts += heights  #核心一步,就是基于基准上的百分比累加
        #print(starts) 这个变量就是能否百分比显示的关键,可以打印输出看一下
        percentage_text = data[i,: ]/ data.sum(axis =0) #文本标记的数据
        
        r, g, b, _ = color  # 这里进行像素的分割
        text_color = 'white' if r * g * b < 0.5 else 'k'  #根据颜色基调分配文本标记的颜色
        for y, (x, c) in enumerate(zip(xcenters, percentage_text)):
            ax.text(y, x, f'{round(c*100,2)}%', ha='center', va='center',
                    color=text_color, rotation = 90) #添加文本标记
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='large') #设置图例
    return fig, ax  #返回图像


category_names = ['Strongly disagree', 'Disagree',
                  'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
    'Question 1': [10, 15, 17, 32, 26],
    'Question 2': [26, 22, 29, 10, 13],
    'Question 3': [35, 37, 7, 2, 19],
    'Question 4': [32, 11, 9, 15, 33],
    'Question 5': [21, 29, 5, 5, 40],
    'Question 6': [8, 19, 5, 30, 380]
}

df = pd.DataFrame(results, index=category_names)
percentage_bar(df)
plt.xticks(rotation=0)




# 修改版本2(横向)
import numpy as np
import matplotlib.pyplot as plt

category_names = ['Strongly disagree', 'Disagree',
                  'Neither agree nor disagree', 'Agree', 'Strongly agree']
results = {
    'Question 1': [20, 20, 20, 20, 20],
    'Question 2': [26, 22, 29, 10, 13],
    'Question 3': [35, 37, 7, 2, 19],
    'Question 4': [32, 11, 9, 15, 33],
    'Question 5': [21, 29, 5, 5, 40],
    'Question 6': [8, 19, 5, 30, 38]
}

df = pd.DataFrame(results, index=category_names)

def survey(df):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    category_names = df.index.tolist()
    results = df.to_dict(orient = 'list')  
        
    labels = list(results.keys())                # labels:Question
    data = np.array(list(results.values()))    
    
    # 转换成百分比
    data_cum = data.cumsum(axis=1) /data.sum(axis =1).reshape(len(data),1) # 横向累加,后面的数值是前面的求和
    data=data/data.sum(axis =1).reshape(len(data),1)
    
    #定义颜色
    category_colors = plt.get_cmap('RdYlGn')(
        np.linspace(0.15, 0.85, data.shape[1]))  # np.linspace(0.15, 0.85, 9) 等差数列

    fig, ax = plt.subplots(figsize=(9.2, 5))  # 画布的大小
    ax.invert_yaxis()         # 设置row的顺序(Question)
    ax.xaxis.set_visible(False) #设置x轴刻度不可见
    ax.set_xlim(0, np.sum(data, axis=1).max()) #设置y轴的显示范围
    ax.set_xlim(0, 1) #设置y轴的显示范围
    
    for i, (colname, color) in enumerate(zip(category_names, category_colors)):   # category_names:'Strongly disagree'
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        #画条状图
        ax.barh(labels, widths, left=starts, height=0.5,
                label=colname, color=color)
        xcenters = starts + widths / 2
        # 条状内写对应比例
        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        for y, (x, c) in enumerate(zip(xcenters, widths)):
            ax.text(x, y, f'{round(c*100,2)}%', ha='center', va='center',
                    color=text_color)
    #  加上图例       
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='small')

    return fig, ax


survey(df)
plt.show()
  1. 实际应用

import pandas as pd
import numpy as np
import altair as alt
import pymysql
from sqlalchemy import create_engine
import seaborn    
import datetime
import matplotlib.pyplot as plt


def survey_percentage(df,T=False):
    """
    """
    if T:
        df =df.T
    category_names = df.index.tolist()
    results = df.to_dict(orient = 'list')  
        
    labels = list(results.keys())                # labels:Question
    data = np.array(list(results.values()))    
    
    # 转换成百分比
    data_cum = data.cumsum(axis=1) /data.sum(axis =1).reshape(len(data),1) # 横向累加,后面的数值是前面的求和
    data=data/data.sum(axis =1).reshape(len(data),1)
    
    #定义颜色
    category_colors = plt.get_cmap('RdYlGn')(
        np.linspace(0.15, 0.85, data.shape[1]))  # np.linspace(0.15, 0.85, 9) 等差数列

    fig, ax = plt.subplots(figsize=(9.2, 5))  # 画布的大小
    ax.invert_yaxis()         # 设置row的顺序(Question)
    ax.xaxis.set_visible(False) #设置x轴刻度不可见
    ax.set_xlim(0, np.sum(data, axis=1).max()) #设置y轴的显示范围
    ax.set_xlim(0, 1) #设置y轴的显示范围
    
    for i, (colname, color) in enumerate(zip(category_names, category_colors)):   # category_names:'Strongly disagree'
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        #画条状图
        ax.barh(labels, widths, left=starts, height=0.5,
                label=colname, color=color)
        xcenters = starts + widths / 2
        # 条状内写对应比例
        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        for y, (x, c) in enumerate(zip(xcenters, widths)):
            ax.text(x, y, f'{round(c*100,2)}%', ha='center', va='center',
                    color=text_color)
    #  加上图例       
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='small')

    return fig, ax



def survey(df,T=False):
    """
    Parameters
    ----------
    results : dict
        A mapping from question labels to a list of answers per category.
        It is assumed all lists contain the same number of entries and that
        it matches the length of *category_names*.
    category_names : list of str
        The category labels.
    """
    if T:
        df =df.T
    category_names = df.index.tolist()
    results = df.to_dict(orient = 'list')  
          
    labels = list(results.keys())
    data = np.array(list(results.values()))
    data_cum = data.cumsum(axis=1)
    category_colors = plt.get_cmap('RdYlGn')(
        np.linspace(0.15, 0.85, data.shape[1]))

    fig, ax = plt.subplots(figsize=(9.2, 5))
    ax.invert_yaxis()
    ax.xaxis.set_visible(False)
    ax.set_xlim(0, np.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(category_names, category_colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        ax.barh(labels, widths, left=starts, height=0.5,
                label=colname, color=color)
        xcenters = starts + widths / 2

        r, g, b, _ = color
        text_color = 'white' if r * g * b < 0.5 else 'darkgrey'
        for y, (x, c) in enumerate(zip(xcenters, widths)):
            ax.text(x, y, str(int(c)), ha='center', va='center',
                    color=text_color)
    ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='small')

    return fig, ax




# 读取数据
engine = create_engine('mysql+pymysql://root:password@ip:3306/db')
sql = ''' select case_name ,result, platform_name, error_msg ,report_create_time from report_reportdetail; '''
df = pd.read_sql_query(sql, engine)

#选取2星期数据
df = df.set_index(pd.to_datetime(df["report_create_time"])).drop("report_create_time", axis=1)
week_ago = datetime.date.today() - datetime.timedelta(days=14)
df = df.sort_index().truncate(before=week_ago)  


# 根据 error message区分
ErrorMsg=df.loc[df['result']=='fail',:].groupby(['error_msg']).count().sort_values(by = ['result'], ascending = [False])
# select count(result) as i,error_msg  from report_reportdetail where result = 'fail' group by error_msg order by i desc ;


# 选取failed case 和指定的平台
df0 =df.loc[(df['result']=='fail') ,:].copy()
df1 =df.loc[(df['result']=='fail')&(df['platform_name']=='HWE') ,:].copy()
df2 =df.loc[(df['result']=='fail')&(df['platform_name']=='HWV') ,:].copy()

#错误消息太长
df0.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
df1.loc[:, "error_msg_short"] = df1["error_msg"].str[0:60]
df2.loc[:, "error_msg_short"] = df2["error_msg"].str[0:60]

#多层索引
df0.set_index(['error_msg_short', 'case_name'], inplace=True) # 1. error_msg_short 为索引,排序就不会乱
# df0.set_index(['case_name', 'error_msg_short'], inplace=True) # 2. case_name 为索引,排序会乱
df1.set_index(['case_name', 'error_msg_short'], inplace=True)
df2.set_index(['case_name', 'error_msg_short'], inplace=True)

#按照索引分组
df0 =df0.groupby(level=df0.index.names).count()  # 1.2. error_msg_short 为索引,排序就不会乱
df1 =df1.groupby(level=df1.index.names).count()
df2 =df2.groupby(level=df2.index.names).count()

# 按结果分析
# HW=df0.loc[(df0["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
ErrorMsgByCase=df0.loc[(df0["result"]>3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']].sort_values(by = ['出现次数'], ascending = [False]) # 1.2. error_msg_short 为索引,排序就不会乱
HWE=df1.loc[(df1["result"]>1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
HWV=df2.loc[(df2["result"]>0) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]

HWE_ENV=df1.loc[(df1["result"]<=1) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]
HWV_ENV=df2.loc[(df2["result"]<=3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]

HWE_PR = df1.iloc[df1.index.get_level_values(1).str.contains('PR\d{6}:')]
HWV_PR = df2.iloc[df2.index.get_level_values(1).str.contains('PR\d{6}:')]


ErrorMsgByCase=df0.loc[(df0["result"]>3) ,:].rename(columns={'result':'出现次数'}).loc[:,['出现次数']]

# ErrorMsg
# ErrorMsgByCase
# HW

HWE
HWE_plt = HWE.unstack(fill_value=0)['出现次数']
HWE_plt.rename(columns={' DL Tput is below 400Mbps!':'DL Tput error', 
                         ' UE 5G attach failed!':'attach failed',
                         ' UL Tput is below 20Mbps!':'UL Tput error'}, 
                inplace = True)
HWE_plt.columns # 查看重要
survey(HWE_plt,T=1)

plt.show()


# df.loc[['index1','index2'],:] 
# HWV

# HWV_PR
# HWE_PR

# HWE_ENV



# data = ErrorMsgByCase.reset_index()
# fg = seaborn.factorplot(y='case_name', x='出现次数',col='error_msg_short', data=data, kind='bar')
# fg.fig.set_size_inches(18,3)
# fg.set_xlabels('')

posted @ 2020-11-15 19:11  该显示昵称已被使用了  阅读(87)  评论(0编辑  收藏  举报