2020东京奥运会奖牌榜可视化分析(Pyechart)
数据获取和处理
从网页中获取各国的奖牌数量和排名以及奖牌类型(json格式)。
#奖牌榜数据
url = 'https://app-sc.miguvideo.com/vms-livedata/olympic-medal/total-table/15/110000004609'
data= requests.get(url).json()
#从json格式的数据中,提前排名(rank)、国家中文名字、国家ID、金牌数、银牌数、铜牌数、奖牌总数
df00 = pd.DataFrame()
for item in data['body']['allMedalData']:
df00 = df00.append([[item['rank'], item['countryName'],item['countryId'],
item['goldMedalNum'], item['silverMedalNum'],
item['bronzeMedalNum'], item['totalMedalNum']]])
df00.columns = ['rank', 'C_name', 'countryId','goldMedalNum',
'silverMedalNum', 'bronzeMedalNum', 'totalMedalNum']
df00.reset_index(drop='index', inplace=True)
df00[['goldMedalNum','silverMedalNum','bronzeMedalNum','totalMedalNum']] = df00[['goldMedalNum','silverMedalNum','bronzeMedalNum','totalMedalNum']].astype(int)
#计数获奖能力(金牌权重为1,银牌为2/3、铜牌为1/3
df00['totalMedalNum2'] = df00['goldMedalNum'] + df00['silverMedalNum'] * 2/3 + df00['bronzeMedalNum'] * 1/3
df00['S_level'] = df00['totalMedalNum2']/np.max(df00['totalMedalNum2'])
df00['S_level'] = df00['S_level'].apply(lambda x :'%.2f'%x)
df00.sort_values('totalMedalNum', ascending=False, inplace=True)
#对照表,用于获取国家英文名称
with open('./国家名中英文对照表.txt', 'r', encoding='utf-8') as fp:
name_list = fp.readlines()
df01 = pd.DataFrame()
for name in name_list:
df01 = df01.append([name.strip().split(':')])
df01.columns=['C_name', 'E_name']
#合并奖牌榜数据
df02 = pd.merge(df00, df01, how='left', on='C_name')
#从json格式的奖牌类型数据中提取数据
url = 'https://app-sc.miguvideo.com/vms-livedata/olympic-medal/detail-total/15/110000004609'
data2 = requests.get(url).json()
#提取的数据为国家名、国家ID、项目类型、项目分组、获奖名称、奖牌类型
df03 = pd.DataFrame()
for item in data2['body']['medalTableDetail']:
df03 = df03.append([[item['countryName'], item['countryId'],
item['bigItemName'], item['minorItemName'],
item['sportsName'], item['medalType']]])
df03.columns = ['countryName', 'countryId','bigItemName', 'minorItemName', 'sportsName', 'medalType']
df03.reset_index(drop='index', inplace=True)
df03['medalType2'] = df03['medalType'].replace({1:'Gold', 2:'Silver', 3:'Bronze'})
数据可视化
绘制奖牌数量世界地图
def wmap_plot(datas):
w_map = Map()
w_map.add('奖牌数', [list(z) for z in zip(datas['E_name'], datas['totalMedalNum'])],
'world', is_map_symbol_show=False)
w_map.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
w_map.set_global_opts(title_opts=opts.TitleOpts(title='2020年东京奥运会奖牌总数分布图'),
visualmap_opts=opts.VisualMapOpts(max_=np.max(datas['totalMedalNum'])),
legend_opts=opts.LegendOpts(is_show=False)
)
return w_map
绘制各国奖牌统计柱状图(前20名)
def bar_plot(datas,n=20):
bar = Bar()
bar.add_xaxis(datas['C_name'][:n].tolist())
bar.add_yaxis('GoldMetal', datas['goldMedalNum'][:n].tolist(), stack='stack1')
bar.add_yaxis('SilverMetal', datas['silverMedalNum'][:n].tolist(), stack='stack1')
bar.add_yaxis('BronzeMetal', datas['bronzeMedalNum'][:n].tolist(), stack='stack1')
bar.set_series_opts(label_opts=opts.LabelOpts(position='inside', font_size=8))
bar.set_global_opts(title_opts=opts.TitleOpts(title='2020年东京奥运会奖牌榜'),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45))
)
bar.extend_axis(yaxis=opts.AxisOpts(name='S_level',type_='value'))
line=Line()
line.add_xaxis(datas['C_name'][:n].tolist())
line.add_yaxis('S_level', yaxis_index=1,
y_axis=datas['S_level'][:n].tolist(),
label_opts=opts.LabelOpts(position='top')
)
return bar.overlap(line)
绘制前10名的奖牌类型占比分析图
def pie_plot(datas, country_name, countryId):
df = datas[datas['countryId']==countryId]
df = df.groupby(['bigItemName', 'medalType2']).count()['medalType']
df = df.unstack().fillna(0)
dict_datas = []
for item in df.index:
dict_data = opts.SunburstItem(
name=item,
value=df.loc[item].sum(),
children=[
opts.SunburstItem(name="Gold", value=df.loc[item, 'Gold']),
opts.SunburstItem(name="Silver", value=df.loc[item, 'Silver']),
opts.SunburstItem(name="Bronze", value=df.loc[item, 'Bronze']),
],
)
dict_datas.append(dict_data)
sunburst = (
Sunburst(init_opts=opts.InitOpts(width="1000px", height="600px"))
.add(series_name=country_name, data_pair=dict_datas, radius=['20%', "80%"])
.set_global_opts(title_opts=opts.TitleOpts(title="{}获奖项目比例分析".format(country_name)))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}", font_size=10))
)
return sunburst
def tpie(data01, data02):
t = Timeline()
for item in zip(data01[:10]['C_name'],data01[:10]['countryId']):
pie = pie_plot(datas=data02, country_name=item[0], countryId=item[1])
t.add(pie, "{}".format(item[0]))
return t
绘制图形标头Title
def title_plot():
title = (
Pie(init_opts=opts.InitOpts(chart_id=1))
.set_global_opts(
title_opts=opts.TitleOpts(title="2020东京奥运会奖牌榜分析",
title_textstyle_opts=opts.TextStyleOpts(font_size=36, color='#000000'),
pos_left='center',
pos_top='middle'))
)
return title
页面布局Page
def page():
page = Page(layout=Page.DraggablePageLayout, page_title="2020东京奥运会奖牌榜")
page.add(
title_plot(),
wmap_plot(datas=df02),
bar_plot(datas=df02),
tpie(data01=df02, data02=df03)
)
return page
# page.render('2020东京奥运会奖牌榜-test.html')
# page.save_resize_html(source='2020东京奥运会奖牌榜-test.html',
# cfg_file='chart_config2.json',
# dest='2020东京奥运会奖牌榜.html'
# )