Python_爬虫_基础
1.urllib 和 Xpath的区别与联系
from urllib import request from lxml import etree from bs4 import BeautifulSoup url = "https://www.zhipin.com/job_detail/1418671405.html?ka=search_list_1" req = request.urlopen(url).read().decode("utf8") sel=etree.HTML(req) #实例化 job_desc = sel.xpath(r'//*[@id="main"]/div[3]/div/div[2]/div[3]/div[1]/div/text()')[0] company_desc = sel.xpath(r'//*[@id="main"]/div[3]/div/div[2]/div[3]/div[2]/div/text()')[0] print(job_desc) print(company_desc) # ------------------------------------------------------------------------------------------------- from urllib import request from bs4 import BeautifulSoup url = "https://www.zhipin.com/job_detail/1418671405.html?ka=search_list_1" req = request.urlopen(url).read().decode("utf8") soup = BeautifulSoup(req,'html.parser') job_desc = soup.find().find_all() #直接写筛选条件,不能用xpath等 print(job_desc)