抓取菜鸟教程中python内置函数的关键字和链接
import requests
from lxml import etree
import os
'''
1.抓取菜鸟教程中python内置函数的关键字和链接
并保存在文件中
2.抓取跳转路劲的页面中的描述性语言
失败
'''
# 看来抓取页面和跳转的链接非常的简单
url = "https://www.runoob.com/python/python-built-in-functions.html"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
response = requests.get(url = url,headers=headers)
page_1 = response.text
tree = etree.HTML(page_1)
a_list = tree.xpath('//div[@id="content"]/table/tbody/tr/td/a/text()')
a_herf = tree.xpath('//div[@id="content"]/table/tbody/tr/td/a/@href') # 取属性要加@
print(a_list)
print(a_herf)
fp = open('python内置函数.txt','w',encoding='utf-8')
a = 0
# 抓取跳转链接之后的关键字 这里的xpath路径返回空列表
for url_li in a_herf:
url1 = "https://www.runoob.com/" + url_li
fp.write(a_list[a]+'\t'+url1+'\n')
a+=1
fp.close()
# page_2 = requests.get(url=url1,headers=headers)
# tree1 = etree.HTML(page_1)
# p_list = tree1.xpath('//div[@class="article-intro"]/p/text')
# p_shuomin = tree1.xpath('//div[@id="content"]/blockquote/text') # 这个是说明描述
# print(p_list)
# print('*'*40)
# print(p_shuomin)
努力拼搏吧,不要害怕,不要去规划,不要迷茫。但你一定要在路上一直的走下去,尽管可能停滞不前,但也要走。