数据科学和人工智能技术笔记 二十、数据可视化

二十、数据可视化

作者:Chris Albon

译者:飞龙

协议:CC BY-NC-SA 4.0

MatPlotLib 中的双向条形图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 创建数据帧
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
first_namepre_scoremid_scorepost_score
0Jason4255
1Molly249443
2Tina315723
3Jake26223
4Amy37051
# 输入数据,特别是第二和
# 第三行,跳过第一列
x1 = df.ix[1, 1:]
x2 = df.ix[2, 1:]

# 创建条形标签
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']

# 创建图形
fig = plt.figure(figsize=(8,6))

# 设置 y 的位置
y_pos = np.arange(len(x1))
y_pos = [x for x in y_pos]
plt.yticks(y_pos, bar_labels, fontsize=10)

# 在 y_pos 的位置上创建水平条形
plt.barh(y_pos, 
         # 使用数据 x1
         x1, 
         # 中心对齐
         align='center', 
         # 透明度为 0.4
         alpha=0.4, 
         # 颜色为绿色
         color='#263F13')

# 在 y_pos 的位置上创建水平条形
plt.barh(y_pos, 
         # 使用数据 -x2
         -x2,
         # 中心对齐
         align='center', 
         # 透明度为 0.4
         alpha=0.4, 
         # 颜色为绿色
         color='#77A61D')

# 注解和标签
plt.xlabel('Tina\'s Score: Light Green. Molly\'s Score: Dark Green')
t = plt.title('Comparison of Molly and Tina\'s Score')
plt.ylim([-1,len(x1)+0.1])
plt.xlim([-max(x2)-10, max(x1)+10])
plt.grid()

plt.show()

png

MatPlotLib 中的条形图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 创建数据帧
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
first_namepre_scoremid_scorepost_score
0Jason4255
1Molly249443
2Tina315723
3Jake26223
4Amy37051
# 为每个变量创建得分均值的列表
mean_values = [df['pre_score'].mean(), df['mid_score'].mean(), df['post_score'].mean()]

# 创建变动列表,设为得分上下 .25
variance = [df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25, df['pre_score'].mean() * 0.25]

# 设置条形标签
bar_labels = ['Pre Score', 'Mid Score', 'Post Score']

# 创建条形的 x 位置
x_pos = list(range(len(bar_labels)))

# 在 x 位置上创建条形图
plt.bar(x_pos,
        # 使用 mean_values 中的数据
        mean_values, 
        # y-error 直线设置为变动
        yerr=variance, 
        # 中心对齐
        align='center',
        # 颜色
        color='#FFC222',
        # 透明度为 0.5
        alpha=0.5)

# 添加网格
plt.grid()

# 设置 y 轴高度
max_y = max(zip(mean_values, variance)) # returns a tuple, here: (3, 5)
plt.ylim([0, (max_y[0] + max_y[1]) * 1.1])

# 设置轴标签和标题
plt.ylabel('Score')
plt.xticks(x_pos, bar_labels)
plt.title('Mean Scores For Each Test')

plt.show()

png

Seaborn 中的调色板

import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

# 创建数据帧
data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
                                   'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)

sns.palplot(sns.color_palette("deep", 10))

png

sns.palplot(sns.color_palette("muted", 10))

png

sns.palplot(sns.color_palette("bright", 10))

png

sns.palplot(sns.color_palette("dark", 10))

png

sns.palplot(sns.color_palette("colorblind", 10))

png

sns.palplot(sns.color_palette("Paired", 10))

png

sns.palplot(sns.color_palette("BuGn", 10))

png

sns.palplot(sns.color_palette("GnBu", 10))

png

sns.palplot(sns.color_palette("OrRd", 10))

png

sns.palplot(sns.color_palette("PuBu", 10))

png

sns.palplot(sns.color_palette("YlGn", 10))

png

sns.palplot(sns.color_palette("YlGnBu", 10))

png

sns.palplot(sns.color_palette("YlOrBr", 10))

png

sns.palplot(sns.color_palette("YlOrRd", 10))

png

sns.palplot(sns.color_palette("BrBG", 10))

png

sns.palplot(sns.color_palette("PiYG", 10))

png

sns.palplot(sns.color_palette("PRGn", 10))

png

sns.palplot(sns.color_palette("PuOr", 10))

png

sns.palplot(sns.color_palette("RdBu", 10))

png

sns.palplot(sns.color_palette("RdGy", 10))

png

sns.palplot(sns.color_palette("RdYlBu", 10))

png

sns.palplot(sns.color_palette("RdYlGn", 10))

png

sns.palplot(sns.color_palette("Spectral", 10))

png

# 创建调色板并将其设为当前调色板
flatui = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.set_palette(flatui)
sns.palplot(sns.color_palette())

png

# 设置绘图颜色
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="#34495e")

# <matplotlib.axes._subplots.AxesSubplot at 0x116f5db70> 

png

使用 Seaborn 和 pandas 创建时间序列绘图

import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

data = {'date': ['2014-05-01 18:47:05.069722', '2014-05-01 18:47:05.119994', '2014-05-02 18:47:05.178768', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.230071', '2014-05-02 18:47:05.280592', '2014-05-03 18:47:05.332662', '2014-05-03 18:47:05.385109', '2014-05-04 18:47:05.436523', '2014-05-04 18:47:05.486877'], 
        'deaths_regiment_1': [34, 43, 14, 15, 15, 14, 31, 25, 62, 41],
        'deaths_regiment_2': [52, 66, 78, 15, 15, 5, 25, 25, 86, 1],
        'deaths_regiment_3': [13, 73, 82, 58, 52, 87, 26, 5, 56, 75],
        'deaths_regiment_4': [44, 75, 26, 15, 15, 14, 54, 25, 24, 72],
        'deaths_regiment_5': [25, 24, 25, 15, 57, 68, 21, 27, 62, 5],
        'deaths_regiment_6': [84, 84, 26, 15, 15, 14, 26, 25, 62, 24],
        'deaths_regiment_7': [46, 57, 26, 15, 15, 14, 26, 25, 62, 41]}
df = pd.DataFrame(data, columns = ['date', 'battle_deaths', 'deaths_regiment_1', 'deaths_regiment_2',
                                   'deaths_regiment_3', 'deaths_regiment_4', 'deaths_regiment_5',
                                   'deaths_regiment_6', 'deaths_regiment_7'])
df = df.set_index(df.date)

sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], color="indianred")

# <matplotlib.axes._subplots.AxesSubplot at 0x1140be780> 

png

# 带有置信区间直线,但是没有直线的时间序列绘图
sns.tsplot([df.deaths_regiment_1, df.deaths_regiment_2, df.deaths_regiment_3, df.deaths_regiment_4,
            df.deaths_regiment_5, df.deaths_regiment_6, df.deaths_regiment_7], err_style="ci_bars", interpolate=False)

# <matplotlib.axes._subplots.AxesSubplot at 0x116400668> 

png

使用 Seaborn 创建散点图

import pandas as pd
%matplotlib inline
import random
import matplotlib.pyplot as plt
import seaborn as sns

# 创建空数据帧
df = pd.DataFrame()

# 添加列
df['x'] = random.sample(range(1, 1000), 5)
df['y'] = random.sample(range(1, 1000), 5)
df['z'] = [1,0,0,1,0]
df['k'] = ['male','male','male','female','female']

# 查看前几行数据
df.head()
xyzk
04669481male
18324810male
29784650male
35102061female
48483570female
# 设置散点图样式
sns.set_context("notebook", font_scale=1.1)
sns.set_style("ticks")

# 创建数据帧的散点图
sns.lmplot('x', # 横轴
           'y', # 纵轴
           data=df, # 数据源
           fit_reg=False, # 不要拟合回归直线
           hue="z", # 设置颜色
           scatter_kws={"marker": "D", # 设置标记样式
                        "s": 100}) # 设置标记大小

# 设置标题
plt.title('Histogram of IQ')

# 设置横轴标签
plt.xlabel('Time')

# 设置纵轴标签
plt.ylabel('Deaths')

# <matplotlib.text.Text at 0x112b7bb70> 

png

MatPlotLib 中的分组条形图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
first_namepre_scoremid_scorepost_score
0Jason4255
1Molly249443
2Tina315723
3Jake26223
4Amy37051
# 设置条形的位置和宽度
pos = list(range(len(df['pre_score']))) 
width = 0.25 

# 绘制条形
fig, ax = plt.subplots(figsize=(10,5))

# 使用 pre_score 数据,
# 在位置 pos 上创建条形
plt.bar(pos, 
        # 使用数据 df['pre_score']
        df['pre_score'], 
        # 宽度
        width, 
        # 透明度为 0.5
        alpha=0.5, 
        # 颜色
        color='#EE3224', 
        # 标签是 first_name 的第一个值
        label=df['first_name'][0]) 

# 使用 mid_score 数据,
# 在位置 pos + 一定宽度上创建条形
plt.bar([p + width for p in pos], 
        # 使用数据 df['mid_score']
        df['mid_score'],
        # 宽度
        width, 
        # 透明度为 0.5
        alpha=0.5, 
        # 颜色
        color='#F78F1E', 
        # 标签是 first_name 的第二个值
        label=df['first_name'][1]) 

# 使用 post_score 数据,
# 在位置 pos + 一定宽度上创建条形
plt.bar([p + width*2 for p in pos], 
        # 使用数据 df['post_score']
        df['post_score'], 
        # 宽度
        width, 
        # 透明度为 0.5
        alpha=0.5, 
        # 颜色
        color='#FFC222', 
        # 标签是 first_name 的第三个值
        label=df['first_name'][2]) 

# 设置纵轴标签
ax.set_ylabel('Score')

# 设置标题
ax.set_title('Test Subject Scores')

# 设置 x 刻度的位置
ax.set_xticks([p + 1.5 * width for p in pos])

# 设置 x 刻度的标签
ax.set_xticklabels(df['first_name'])

# 设置横轴和纵轴的区域
plt.xlim(min(pos)-width, max(pos)+width*4)
plt.ylim([0, max(df['pre_score'] + df['mid_score'] + df['post_score'])] )

# 添加图例并展示绘图
plt.legend(['Pre Score', 'Mid Score', 'Post Score'], loc='upper left')
plt.grid()
plt.show()

png

MatPlotLib 中的直方图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math

# 设置 ipython 的最大行数
pd.set_option('display.max_row', 1000)

# 将 ipython 的最大列宽设为 50
pd.set_option('display.max_columns', 50)

df = pd.read_csv('https://www.dropbox.com/s/52cb7kcflr8qm2u/5kings_battles_v1.csv?dl=1')
df.head()
nameyearbattle_numberattacker_kingdefender_kingattacker_1attacker_2attacker_3attacker_4defender_1defender_2defender_3defender_4attacker_outcomebattle_typemajor_deathmajor_captureattacker_sizedefender_sizeattacker_commanderdefender_commandersummerlocationregionnote
0Battle of the Golden Tooth2981Joffrey/Tommen BaratheonRobb StarkLannisterNaNNaNNaNTullyNaNNaNNaNwinpitched battle10150004000Jaime LannisterClement Piper, Vance1Golden ToothThe WesterlandsNaN
1Battle at the Mummer’s Ford2982Joffrey/Tommen BaratheonRobb StarkLannisterNaNNaNNaNBaratheonNaNNaNNaNwinambush10NaN120Gregor CleganeBeric Dondarrion1Mummer’s FordThe RiverlandsNaN
2Battle of Riverrun2983Joffrey/Tommen BaratheonRobb StarkLannisterNaNNaNNaNTullyNaNNaNNaNwinpitched battle011500010000Jaime Lannister, Andros BraxEdmure Tully, Tytos Blackwood1RiverrunThe RiverlandsNaN
3Battle of the Green Fork2984Robb StarkJoffrey/Tommen BaratheonStarkNaNNaNNaNLannisterNaNNaNNaNlosspitched battle111800020000Roose Bolton, Wylis Manderly, Medger Cerwyn, H…Tywin Lannister, Gregor Clegane, Kevan Lannist…1Green ForkThe RiverlandsNaN
4Battle of the Whispering Wood2985Robb StarkJoffrey/Tommen BaratheonStarkTullyNaNNaNLannisterNaNNaNNaNwinambush1118756000Robb Stark, Brynden TullyJaime Lannister1Whispering WoodThe RiverlandsNaN
# 制作攻击方和防守方大小的两个变量
# 但是当有超过 10000 个攻击方时将其排除在外
data1 = df['attacker_size'][df['attacker_size'] < 90000]
data2 = df['defender_size'][df['attacker_size'] < 90000]

# 创建 2000 个桶
bins = np.arange(data1.min(), data2.max(), 2000) # 固定桶的大小

# 绘制攻击方大小的直方图
plt.hist(data1, 
         bins=bins, 
         alpha=0.5, 
         color='#EDD834',
         label='Attacker')

# 绘制防守方大小的直方图
plt.hist(data2, 
         bins=bins, 
         alpha=0.5, 
         color='#887E43',
         label='Defender')

# 设置图形的 x 和 y 边界
plt.ylim([0, 10])

# 设置标题和标签
plt.title('Histogram of Attacker and Defender Size')
plt.xlabel('Number of troops')
plt.ylabel('Number of battles')
plt.legend(loc='upper right')

plt.show()

png

# 制作攻击方和防守方大小的两个变量
# 但是当有超过 10000 个攻击方时将其排除在外
data1 = df['attacker_size'][df['attacker_size'] < 90000]
data2 = df['defender_size'][df['attacker_size'] < 90000]

# 创建 10 个桶,最小值为 
# data1 和 data2 的最小值
bins = np.linspace(min(data1 + data2), 
                   # 最大值为它们的最大值
                   max(data1 + data2),
                   # 并分为 10 个桶
                   10)

# 绘制攻击方大小的直方图
plt.hist(data1, 
         # 使用定义好的桶
         bins=bins, 
         # 透明度
         alpha=0.5, 
         # 颜色
         color='#EDD834',
         # 攻击方的标签
         label='Attacker')

# 绘制防守方大小的直方图
plt.hist(data2, 
         # 使用定义好的桶
         bins=bins, 
         # 透明度
         alpha=0.5, 
         # 颜色
         color='#887E43',
         # 防守方的标签
         label='Defender')

# 设置图形的 x 和 y 边界
plt.ylim([0, 10])

# 设置标题和标签
plt.title('Histogram of Attacker and Defender Size')
plt.xlabel('Number of troops')
plt.ylabel('Number of battles')
plt.legend(loc='upper right')

plt.show()

png

从 Pandas 数据帧生成 MatPlotLib 散点图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 
        'female': [0, 1, 1, 0, 1],
        'age': [42, 52, 36, 24, 73], 
        'preTestScore': [4, 24, 31, 2, 3],
        'postTestScore': [25, 94, 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore'])
df
first_namelast_nameagefemalepreTestScorepostTestScore
0JasonMiller420425
1MollyJacobson5212494
2TinaAli3613157
3JakeMilner240262
4AmyCooze731370
# preTestScore 和 postTestScore 的散点图
# 每个点的大小取决于年龄
plt.scatter(df.preTestScore, df.postTestScore
, s=df.age)

# <matplotlib.collections.PathCollection at 0x10ca42b00> 

png

# preTestScore 和 postTestScore 的散点图
# 大小为 300,颜色取决于性别
plt.scatter(df.preTestScore, df.postTestScore, s=300, c=df.female)

# <matplotlib.collections.PathCollection at 0x10cb90a90> 

png

Matplotlib 的简单示例

# 让 Jupyter 加载 matplotlib 
# 并内联创建所有绘图(也就是在页面上)
%matplotlib inline

import matplotlib.pyplot as pyplot

pyplot.plot([1.6, 2.7])

# [<matplotlib.lines.Line2D at 0x10c4e7978>] 

png

MatPlotLib 中的饼图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

raw_data = {'officer_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'jan_arrests': [4, 24, 31, 2, 3],
        'feb_arrests': [25, 94, 57, 62, 70],
        'march_arrests': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['officer_name', 'jan_arrests', 'feb_arrests', 'march_arrests'])
df
officer_namejan_arrestsfeb_arrestsmarch_arrests
0Jason4255
1Molly249443
2Tina315723
3Jake26223
4Amy37051
# 创建一列,其中包含每个官员的总逮捕数
df['total_arrests'] = df['jan_arrests'] + df['feb_arrests'] + df['march_arrests']
df
officer_namejan_arrestsfeb_arrestsmarch_arreststotal_arrests
0Jason425534
1Molly249443161
2Tina315723111
3Jake2622387
4Amy37051124
# (从 iWantHue)创建一列颜色
colors = ["#E13F29", "#D69A80", "#D63B59", "#AE5552", "#CB5C3B", "#EB8076", "#96624E"]

# 创建饼图
plt.pie(
    # 使用数据 total_arrests
    df['total_arrests'],
    # 标签为官员名称
    labels=df['officer_name'],
    # 没有阴影
    shadow=False,
    # 颜色
    colors=colors,
    # 将一块扇形移出去
    explode=(0, 0, 0, 0, 0.15),
    # 起始角度为 90 度
    startangle=90,
    # 将百分比列为分数
    autopct='%1.1f%%',
    )

# 使饼状图为正圆
plt.axis('equal')

# 查看绘图
plt.tight_layout()
plt.show()

png

MatPlotLib 中的散点图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 展示 ipython 的最大行数
pd.set_option('display.max_row', 1000)

# 将 ipython 的最大列宽设为 50
pd.set_option('display.max_columns', 50)

df = pd.read_csv('https://raw.githubusercontent.com/chrisalbon/war_of_the_five_kings_dataset/master/5kings_battles_v1.csv')
df.head()
nameyearbattle_numberattacker_kingdefender_kingattacker_1attacker_2attacker_3attacker_4defender_1defender_2defender_3defender_4attacker_outcomebattle_typemajor_deathmajor_captureattacker_sizedefender_sizeattacker_commanderdefender_commandersummerlocationregionnote
0Battle of the Golden Tooth2981Joffrey/Tommen BaratheonRobb StarkLannisterNaNNaNNaNTullyNaNNaNNaNwinpitched battle1.00.015000.04000.0Jaime LannisterClement Piper, Vance1.0Golden ToothThe WesterlandsNaN
1Battle at the Mummer’s Ford2982Joffrey/Tommen BaratheonRobb StarkLannisterNaNNaNNaNBaratheonNaNNaNNaNwinambush1.00.0NaN120.0Gregor CleganeBeric Dondarrion1.0Mummer’s FordThe RiverlandsNaN
2Battle of Riverrun2983Joffrey/Tommen BaratheonRobb StarkLannisterNaNNaNNaNTullyNaNNaNNaNwinpitched battle0.01.015000.010000.0Jaime Lannister, Andros BraxEdmure Tully, Tytos Blackwood1.0RiverrunThe RiverlandsNaN
3Battle of the Green Fork2984Robb StarkJoffrey/Tommen BaratheonStarkNaNNaNNaNLannisterNaNNaNNaNlosspitched battle1.01.018000.020000.0Roose Bolton, Wylis Manderly, Medger Cerwyn, H…Tywin Lannister, Gregor Clegane, Kevan Lannist…1.0Green ForkThe RiverlandsNaN
4Battle of the Whispering Wood2985Robb StarkJoffrey/Tommen BaratheonStarkTullyNaNNaNLannisterNaNNaNNaNwinambush1.01.01875.06000.0Robb Stark, Brynden TullyJaime Lannister1.0Whispering WoodThe RiverlandsNaN
# 创建图形
plt.figure(figsize=(10,8))

# 创建散点图
            # 298 年的攻击方大小为 x 轴
plt.scatter(df['attacker_size'][df['year'] == 298], 
            # 298 年的防守方大小为 y 轴
            df['defender_size'][df['year'] == 298], 
            # 标记
            marker='x', 
            # 颜色
            color='b',
            # 透明度
            alpha=0.7,
            # 大小
            s = 124,
            # 标签
            label='Year 298')

            # 299 年的攻击方大小为 x 轴
plt.scatter(df['attacker_size'][df['year'] == 299], 
            # 299 年的防守方大小为 y 轴
            df['defender_size'][df['year'] == 299], 
            # 标记
            marker='o', 
            # 颜色
            color='r', 
            # 透明度
            alpha=0.7,
            # 大小
            s = 124,
            # 标签
            label='Year 299')

            # 300 年的攻击方大小为 x 轴
plt.scatter(df['attacker_size'][df['year'] == 300], 
            # 300 年的防守方大小为 x 轴
            df['defender_size'][df['year'] == 300], 
            # 标记
            marker='^', 
            # 颜色
            color='g', 
            # 透明度
            alpha=0.7, 
            # 大小
            s = 124,
            # 标签
            label='Year 300')

# 标题
plt.title('Battles Of The War Of The Five Kings')

# y 标签
plt.ylabel('Defender Size')

# x 标签
plt.xlabel('Attacker Size')

# 图例
plt.legend(loc='upper right')

# 设置图形边界
plt.xlim([min(df['attacker_size'])-1000, max(df['attacker_size'])+1000])
plt.ylim([min(df['defender_size'])-1000, max(df['defender_size'])+1000])

plt.show()

png

MatPlotLib 中的栈式百分比条形图

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt

raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'pre_score': [4, 24, 31, 2, 3],
        'mid_score': [25, 94, 57, 62, 70],
        'post_score': [5, 43, 23, 23, 51]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'pre_score', 'mid_score', 'post_score'])
df
first_namepre_scoremid_scorepost_score
0Jason4255
1Molly249443
2Tina315723
3Jake26223
4Amy37051
# 创建带有一个子图的图形
f, ax = plt.subplots(1, figsize=(10,5))

# 将条宽设为 1
bar_width = 1

# 条形左边界的位置
bar_l = [i for i in range(len(df['pre_score']))] 

# x 轴刻度的位置(条形的中心是条形标签)
tick_pos = [i+(bar_width/2) for i in bar_l] 

# 创建每个参与者的总得分
totals = [i+j+k for i,j,k in zip(df['pre_score'], df['mid_score'], df['post_score'])]

# 创建每个参与者的 pre_score 和总得分的百分比
pre_rel = [i / j * 100 for  i,j in zip(df['pre_score'], totals)]

# 创建每个参与者的 mid_score 和总得分的百分比
mid_rel = [i / j * 100 for  i,j in zip(df['mid_score'], totals)]

# 创建每个参与者的 post_score 和总得分的百分比
post_rel = [i / j * 100 for  i,j in zip(df['post_score'], totals)]

# 在位置 bar_1 创建条形图
ax.bar(bar_l, 
       # 使用数据 pre_rel
       pre_rel, 
       # 标签 
       label='Pre Score', 
       # 透明度
       alpha=0.9, 
       # 颜色
       color='#019600',
       # 条形宽度
       width=bar_width,
       # 边框颜色
       edgecolor='white'
       )

# 在位置 bar_1 创建条形图
ax.bar(bar_l, 
       # 使用数据 mid_rel
       mid_rel, 
       # 底部为 pre_rel
       bottom=pre_rel, 
       # 标签
       label='Mid Score', 
       # 透明度
       alpha=0.9, 
       # 颜色
       color='#3C5F5A', 
       # 条形宽度
       width=bar_width,
       # 边框颜色
       edgecolor='white'
       )

# Create a bar chart in position bar_1
ax.bar(bar_l, 
       # 使用数据 post_rel
       post_rel, 
       # 底部为 pre_rel 和 mid_rel
       bottom=[i+j for i,j in zip(pre_rel, mid_rel)], 
       # 标签
       label='Post Score',
       # 透明度
       alpha=0.9, 
       # 颜色
       color='#219AD8', 
       # 条形宽度
       width=bar_width,
       # 边框颜色
       edgecolor='white'
       )

# 将刻度设为 first_name
plt.xticks(tick_pos, df['first_name'])
ax.set_ylabel("Percentage")
ax.set_xlabel("")

# 设置图形边界
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.ylim(-10, 110)

# 旋转轴标签
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')

# 展示绘图
plt.show()

png

posted @ 2018-12-29 22:09  绝不原创的飞龙  阅读(5)  评论(0编辑  收藏  举报  来源