pandas:数据可视化
普通柱状图
''' 普通柱状图 ''' import pandas as pd import matplotlib.pyplot as plt file = '/tmp/Students2.xlsx' student = pd.read_excel(file) student_filter = student.sort_values(by='Number',ascending=False) print(student_filter) plt.bar(student_filter.Field,student_filter.Number,color='orange') plt.xticks(student_filter.Field,rotation='90') plt.xlabel('Field') plt.ylabel('Number') plt.title('International student by field',fontsize='16') plt.tight_layout() plt.show() ''' 原生方法 ''' # student_filter.plot.bar(x='Field',y='Number',color='orange',title='International student by field') # plt.show()
分组柱状图
''' 分组柱状图 ''' import pandas as pd import matplotlib.pyplot as plt file = '/tmp/Students3.xlsx' student = pd.read_excel(file) student_filter = student.sort_values(by='2017',ascending=False) print(student_filter) # plt.bar(student_filter.Field,[2017,2016],color=['orange','red']) # plt.show() student_filter.plot.bar('Field',['2016','2017'],color=['orange','red']) plt.title('International Students by Field',fontsize=16) plt.xlabel('Field',fontweight='bold') plt.ylabel('Number',fontweight='bold') ax = plt.gca() ax.set_xticklabels(student_filter['Field'],rotation=40,ha='right') plt.gcf().subplots_adjust(left=0.2,bottom=0.42) plt.show()
叠加柱状图-横向叠加柱状图
''' 叠加柱状图 横向叠加柱状图 ''' import pandas as pd import matplotlib.pyplot as plt file = '/tmp/Users.xlsx' users = pd.read_excel(file) users['Total'] = users['Oct'] + users['Nov'] + users['Dec'] users.sort_values(by='Total',inplace=True,ascending=False) print(users) users.plot.bar(x='Name',y=['Oct','Nov','Dec'],stacked=True) # 水平方向叠加 # users.plot.barh(x='Name',y=['Oct','Nov','Dec'],stacked=True) plt.tight_layout() plt.show()
饼状图
''' 饼状图 ''' import pandas as pd import matplotlib.pyplot as plt file = '/tmp/Students.xlsx' # 要显示的列为主键列 students = pd.read_excel(file,index_col='From') print(students) # 按照2017列排序 students['2017'].plot.pie(fontsize=8,counterclock=False,startangle=-270) plt.title('Source of International Students',fontsize=16,fontweight='bold') plt.ylabel('2017',fontsize=12,fontweight='bold') plt.show()
曲线图-叠加曲线图
''' 曲线图 叠加曲线图 ''' import pandas as pd import matplotlib.pyplot as plt file = '/tmp/Orders.xlsx' weeks = pd.read_excel(file,index_col='Week') print(weeks) # 曲线图 # weeks.plot(y=['Accessories', 'Bikes', 'Clothing', 'Components']) weeks.plot.area(y=['Accessories', 'Bikes', 'Clothing', 'Components']) plt.title('Sales Trends',fontsize=16,fontweight='bold') plt.xticks(weeks.index,fontsize=8) plt.show()
密度图-离散图-直方图
''' 密度图 离散图 直方图 ''' import pandas as pd import matplotlib.pyplot as plt pd.options.display.max_columns = 999 file = '/tmp/home_data.xlsx' homes = pd.read_excel(file) print(homes.head()) # 密度图 # homes.plot.scatter(x='sqft_living',y='price') # 离散图 # homes.sqft_living.plot.kde() # 直方图 homes.price.plot.hist(bins=200) plt.xticks(range(0,max(homes.price),100000),fontsize=8,rotation=90) # homes.sqft_living.plot.hist(bins=100) # plt.xticks(range(0,max(homes.sqft_living),500),fontsize=8,rotation=90) plt.show() # 神奇的相关性 # print(homes.corr())