【项目02】王者荣耀英雄搜索数据结构
【项目02】王者荣耀英雄搜索数据结构
1,目标
1,从 https://pvp.qq.com/web201605/js/herolist.json 获取英雄描述文件herolist.json
2,假定英雄类型标识如下
hero_type =["全部","战士","法师","坦克","刺客","射手","辅助"]
3,从https://pvp.qq.com/web201605/herolist.shtml 获取英雄网页列表,并且提出英雄名称及头像链接
4,从网页提取英雄名称和头像链接信息
5,合并两个信息并构建英雄详细信息数据列表,例如
['嫦娥', '法师|战士|坦克', '寒月公主|露花倒影', 'game.gtimg.cn/images/yxzj/img201606/heroimg/515/515.jpg'], ['上官婉儿', '法师|全部|刺客', '惊鸿之笔|修竹墨客', 'game.gtimg.cn/images/yxzj/img201606/heroimg/513/513.jpg'], ]
6,构建易于搜索的数据结构,并设计函数,实现搜索函数
def lookup(index,keyword):
pass
2,代码实现
import json
from bs4 import BeautifulSoup as bs
import requests
0.1 Database
r = requests.get('https://pvp.qq.com/web201605/js/herolist.json')
#从官网上找到英雄列表 json文件
hero_list = json.loads(r.text)
hero_list = None
## 保存
with open("all_hero.json", 'wt',encoding="utf-8") as fd:
fd.write(r.text)
# 恢复
with open('all_hero.json',encoding="utf-8") as json_data:
hero_list = json.load(json_data)
def search_for_hero_info(name=None):
for hero in hero_list:
if "cname" in hero:
if hero["cname"] == name:
return hero
return None
hero_type =["全部","战士","法师","坦克","刺客","射手","辅助"]
from selenium import webdriver
browser = webdriver.Chrome('./chromedriver')
browser.get("https://pvp.qq.com/web201605/herolist.shtml")
html = browser.page_source
browser.quit()
## 保存HTML
with open("hero_web.html", 'w',encoding="utf-8") as fd:
fd.write(html)
## 恢复保存的HTML
hero_html = None
with open("hero_web.html", 'r',encoding="utf-8") as fd:
hero_html = fd.read()
def build_hero_type(hero):
combine_type = []
if "hero_type" in hero:
combine_type.append(hero_type[hero["hero_type"]])
if "new_type" in hero:
combine_type.append(hero_type[hero["new_type"]])
if "hero_type2" in hero:
combine_type.append(hero_type[hero["hero_type2"]])
return(('|').join(combine_type))
#[hero_name, hero_type, hero_skin, hero_url]
def merge_hero_info(hero_html, hero_json):
all_heros = []
for hero in hero_html:
hero_detail = search_for_hero_info(hero[0])
all_heros.append([hero[0],build_hero_type(hero_detail),hero_detail["skin_name"].strip(" '"),hero[1]])
return all_heros
hero_soup = bs(html,'lxml')
hero_html_list=hero_soup.find("ul",class_="herolist")
all_hero_list =hero_html_list.find_all("li")
gen_heros=[[info.text, info.img["src"].strip("/")] for info in all_hero_list]
combined_heros = merge_hero_info(gen_heros, hero_list)
## build up index
add_to_index
```
index is
[
[<keyword>,[<hero_detail>,...]],
[<keyword>,[<hero_detail>,...]]...
]
keyword string
a url
```
[
[unit,factor],[]
]
[
[keyword],[
['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg'],
['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg'],
['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']],
]
## use list for storage
['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']
0 2 build up index¶
add_to_index
index is [
[<keyword>,[<hero_detail>,...]], [<keyword>,[<hero_detail>,...]]...
] keyword string a url
[ [unit,factor],[] ]
[
[keyword],[ ['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg'], ['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg'], ['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']],
]
3 use list for storage
['百里玄策', '刺客|全部', '嚣狂之镰|威尼斯狂欢', 'http://game.gtimg.cn/images/yxzj/img201606/heroimg/195/195.jpg']
def get_keywords_array(hero):
"""
根据英雄信息,生成keyword的列表
[hero_name, hero_type, hero_skin, hero_url]
"""
keywords =[]
if hero[0]:
keywords.append(hero[0])
if hero[1]:
keywords += hero[1].split('|')
if hero[2]:
keywords +=hero[2].split('|')
return keywords
def add_to_index(index, keyword, info):
"""
添加索引到搜索数据列表中
"""
for entry in index:
if entry[0] == keyword:
entry[1].append(info)
return
#not find
index.append([keyword,[info]])
def build_up_index(index_array):
"""
创建搜索数据列表
"""
for hero_info in combined_heros:
keywords = get_keywords_array(hero_info)
for key in keywords:
add_to_index(index_array,key,hero_info)
# lookup information by keywords
def lookup(index,keyword):
"""
根据关键词在列表中搜索
"""
for entry in index:
if entry[0] == keyword:
return entry[1]
#not find
return entry[0]
search_index=[]
build_up_index(search_index)
lookup(search_index,"苏烈")