四、 Python数据可视化库-Matplotlib

本节内容:

  • 折线图绘制
  • 子图操作
  • 条形图与散点图
  • 柱形图与盒图
  • 细节设置

 

 

1、折线图绘制

import pandas as pd
unrate = pd.read_csv('unrate.csv')
print(unrate.head(5))
unrate['DATE'] = pd.to_datetime(unrate['DATE'])#通过.to_datetime这个日期函数对'DATE'进行一个标准格式的显示
print(unrate.head(5))

###
         DATE  VALUE
0  1948-01-01    3.4
1  1948-02-01    3.8
2  1948-03-01    4.0
3  1948-04-01    3.9
4  1948-05-01    3.5
        DATE  VALUE
0 1948-01-01    3.4
1 1948-02-01    3.8
2 1948-03-01    4.0
3 1948-04-01    3.9
4 1948-05-01    3.5
###

 

import pandas as pd
unrate = pd.read_csv('unrate.csv')
unrate['DATE'] = pd.to_datetime(unrate['DATE'])#通过.to_datetime这个日期函数对'DATE'进行一个标准格式的显示
print(unrate.head(12))

import matplotlib.pyplot as plt  #导入绘图库matplotlib.pyplot,起个别名为plt
#%matplotlib inline
#Using the different pyplot functions, we can create, customize, and display a plot. For example, we can use 2 functions to :
plt.plot()  #绘图 
plt.show()  #显示

###
 
         DATE  VALUE
0  1948-01-01    3.4
1  1948-02-01    3.8
2  1948-03-01    4.0
3  1948-04-01    3.9
4  1948-05-01    3.5
5  1948-06-01    3.6
6  1948-07-01    3.6
7  1948-08-01    3.9
8  1948-09-01    3.8
9  1948-10-01    3.7
10 1948-11-01    3.8
11 1948-12-01    4.0
 

###
first_twelve = unrate[0:5]
plt.plot(first_twelve['DATE'], first_twelve['VALUE'])  #第一参数为横坐标的值,第二个参数为纵坐标的值
plt.show()

###
###
#While the y-axis looks fine, the x-axis tick labels are too close together and are unreadable
#We can rotate the x-axis tick labels by 90 degrees so they don't overlap
#We can specify degrees of rotation using a float or integer value.
plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
plt.xticks(rotation=45) #坐标值太长,对横坐标进行45°旋转
#print help(plt.xticks)
plt.show()

###
###
#xlabel(): accepts a string value, which gets set as the x-axis label.接受一个字符串值,该值被设置为x轴标签。
#ylabel(): accepts a string value, which is set as the y-axis label.接受一个字符串值,该值被设置为y轴标签。
#title(): accepts a string value, which is set as the plot title.接受一个字符串值,该值被设置为plot标题。

plt.plot(first_twelve['DATE'], first_twelve['VALUE'])
plt.xticks(rotation=90)
plt.xlabel('Month')
plt.ylabel('Unemployment Rate')
plt.title('Monthly Unemployment Trends, 1948')
plt.show()

###
###

 

2、子图操作

 

#add_subplot(first,second,index) first means number of Row,second means number of Column.
#add_subplot(first,second,index) first表示行数,second表示列数。
import matplotlib.pyplot as plt
fig = plt.figure()  #指定绘图区间(绘图域)
ax1 = fig.add_subplot(2,2,1)  #前两个参数表示2*2的矩阵形式,最后一个参数表示在2*2的矩阵的第一个位置
ax2 = fig.add_subplot(2,2,2)
ax2 = fig.add_subplot(2,2,4)
plt.show()

###
###

 

import numpy as np
fig = plt.figure()
fig = plt.figure(figsize=(10, 3))#对绘图域指定大小,第一个参数是长,第二个参数是宽
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)

ax1.plot(np.random.randint(1,5,5), np.arange(5))#在子图上进行一些随机操作
ax2.plot(np.arange(10)*3, np.arange(10))
plt.show()

###
<matplotlib.figure.Figure at 0x2a51ea8a9e8>
 
###
unrate['MONTH'] = unrate['DATE'].dt.month  #把数据里面的月份取出来
fig = plt.figure(figsize=(6,3))

plt.plot(unrate[0:12]['MONTH'], unrate[0:12]['VALUE'], c='red')
plt.plot(unrate[12:24]['MONTH'], unrate[12:24]['VALUE'], c='blue')  #用c来指定折线的颜色
plt.show()

###
###

 

fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
    start_index = i*12
    end_index = (i+1)*12
    subset = unrate[start_index:end_index]#[0:12]-red,[12:24]-blue,[24,36]-green...
    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i])    
plt.show()

###
###
fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
    start_index = i*12
    end_index = (i+1)*12
    subset = unrate[start_index:end_index]
    label = str(1948 + i)  #给折线定标签
    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='best')  #把标签框放在右边,改变loc的值,会改变标签框的位置
#print(help(plt.legend) )
plt.show()

###
###
fig = plt.figure(figsize=(10,6))
colors = ['red', 'blue', 'green', 'orange', 'black']
for i in range(5):
    start_index = i*12
    end_index = (i+1)*12
    subset = unrate[start_index:end_index]
    label = str(1948 + i)
    plt.plot(subset['MONTH'], subset['VALUE'], c=colors[i], label=label)
plt.legend(loc='upper left')
plt.xlabel('Month, Integer')
plt.ylabel('Unemployment Rate, Percent')
plt.title('Monthly Unemployment Trends, 1948-1952')

plt.show()

###
###

 

3、条形图与散点图

import matplotlib.pyplot as plt
from numpy import arange
#The Axes.bar() method has 2 required parameters, left and height. bar()方法有两个必需的参数,左边和高度。
#We use the left parameter to specify the x coordinates of the left sides of the bar. 我们使用左参数来指定条形图左侧的x坐标。
#We use the height parameter to specify the height of each bar。我们使用高度参数来指定每个栏的高度
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values   #柱的高度,.ix是一种索引的依据
#print(bar_heights)
bar_positions = arange(5) + 0.75  #每个柱离零值的距离
#print(bar_positions)
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5)  #bar型图是条形,0.5是条形的宽度
plt.show()

###
###

 

#By default, matplotlib sets the x-axis tick labels to the integer values the bars 默认情况下,matplotlib将x轴标记标签设置为条上的整数值
#spanned on the x-axis (from 0 to 6). We only need tick labels on the x-axis where the bars are positioned. 在x轴上(从0到6),我们只需要在横轴上的横轴上标记条就可以了。
#We can use Axes.set_xticks() to change the positions of the ticks to [1, 2, 3, 4, 5]:我们可以使用ax .set_xticks()将蜱的位置改变为[1,2,3,4,5]:

num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
bar_heights = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()

ax.bar(bar_positions, bar_heights, 0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols, rotation=45)  ##x轴下标的名字和放置角度

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

###
###

 

import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_widths = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 0.75
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_widths, 0.5)# #barh型图是横条形,0.5是条形的宽度

ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()

###
###

 

#Let's look at a plot that can help us visualize many points.
fig, ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])  #绘点图
ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')
plt.show()

###
###
#Switching Axes
fig = plt.figure(figsize=(5,10))
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)
ax1.scatter(norm_reviews['Fandango_Ratingvalue'], norm_reviews['RT_user_norm'])
ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'], norm_reviews['Fandango_Ratingvalue'])
ax2.set_xlabel('Rotten Tomatoes')
ax2.set_ylabel('Fandango')
plt.show()

###
###

 

 

 4、柱形图与盒图

 

 

import pandas as pd
import matplotlib.pyplot as plt
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM', 'RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
norm_reviews = reviews[cols]
print(norm_reviews[:5])

###
                             FILM  RT_user_norm  Metacritic_user_nom  \
0  Avengers: Age of Ultron (2015)           4.3                 3.55  
1               Cinderella (2015)           4.0                 3.75  
2                  Ant-Man (2015)           4.5                 4.05  
3          Do You Believe? (2015)           4.2                 2.35  
4   Hot Tub Time Machine 2 (2015)           1.4                 1.70  

   IMDB_norm  Fandango_Ratingvalue 
0       3.90                   4.5 
1       3.55                   4.5 
2       3.90                   4.5 
3       2.70                   4.5 
4       2.55                   3.0 
###
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index() #对上值进行排序

imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()

print(fandango_distribution)
print(imdb_distribution)

 

fig, ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])  #hist中没有指定bins,默认为10条
#ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
#ax.hist(norm_reviews['Fandango_Ratingvalue'], range=(4, 5),bins=20)  #range是空值显示的区间
plt.show()

###
 
###

 

fig = plt.figure(figsize=(5,20))
ax1 = fig.add_subplot(4,1,1)
ax2 = fig.add_subplot(4,1,2)
ax3 = fig.add_subplot(4,1,3)
ax4 = fig.add_subplot(4,1,4)
ax1.hist(norm_reviews['Fandango_Ratingvalue'], bins=20, range=(0, 5))
ax1.set_title('Distribution of Fandango Ratings')
ax1.set_ylim(0, 50)  #.set_ylim用于指定y轴的数值区间

ax2.hist(norm_reviews['RT_user_norm'], 20, range=(0, 5))
ax2.set_title('Distribution of Rotten Tomatoes Ratings')
ax2.set_ylim(0, 50)

ax3.hist(norm_reviews['Metacritic_user_nom'], 20, range=(0, 5))
ax3.set_title('Distribution of Metacritic Ratings')
ax3.set_ylim(0, 50)

ax4.hist(norm_reviews['IMDB_norm'], 20, range=(0, 5))
ax4.set_title('Distribution of IMDB Ratings')
ax4.set_ylim(0, 50)

plt.show()

 

盒图:

 

fig, ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])#画盒图
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0, 5)
plt.show()

###
###
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']
fig, ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values)
ax.set_xticklabels(num_cols, rotation=90)
ax.set_ylim(0,5)
plt.show()

###
###

 

5、细节设置

#Color
import pandas as pd
import matplotlib.pyplot as plt

women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']


cb_dark_blue = (0/255, 107/255, 164/255)   #加载一些特别的颜色进来(eg:深蓝)
cb_orange = (255/255, 128/255, 14/255)

fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    # The color for each line is assigned here.
    ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
    ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men')
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(major_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()

###

###
#Setting Line Width
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)

fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    # Set the line width when specifying how each line should look.
    ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)#linewidth指定线条宽度
    ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=10)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(major_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")  #对四条边的齿轮进行隐藏

plt.legend(loc='upper right')
plt.show()

###
###

 

stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()

###
 
###

 

fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")
    
    if sp == 0:
        ax.text(2005, 87, 'Men')
        ax.text(2002, 8, 'Women')
    elif sp == 5:
        ax.text(2005, 62, 'Men')
        ax.text(2001, 35, 'Women')
plt.show()

###
 
###

 

posted @ 2018-11-22 17:15  大头swag  阅读(230)  评论(0)    收藏  举报