def _get_new_data(self, page_url, soup, final_str_name):
con_datas = []
res_data = {}
# url
res_data['url'] = page_url
res_data['name'] = 'hibernate'
contents = soup.find_all(attrs = {'class':'dl-horizontal'})
#print(contents)
for content in contents:
name = content.find('dt')
str_name = name.get_text()
str_name = str_name.strip()
conts = content.find_all('dd')
str_cont = conts[2].get_text() + '\n'+ conts[3].get_text() + ' : '
str_url = conts[3].find('a')
str_cont = str_cont + str_url['href']
con_data = {}
con_data['name'] = str_name
con_data['cont'] = str_cont
con_datas.append(con_data)
# print("111111")
# print(con_data['name'])
# print("333333")
# print(con_data['cont'])
# print("222222")
return res_data, con_datas
posted on 2017-07-11 14:44  朽木の半夏  阅读(155)  评论(0编辑  收藏  举报