36kr科技频道异步加载
from lxml import etree
import time
import random
from selenium import webdriver
driver = webdriver.Chrome()
url = 'https://36kr.com/information/technology/'
driver.get(url)
for page in range(1, 11):
html = driver.page_source
tree = etree.HTML(html)
time.sleep(random.randint(3, 5))
print(f'********************第{page}页******************')
driver.execute_script('window.scrollBy(0,2200)')
try:
driver.find_element_by_xpath('//div[@class="kr-loading-more-button show"]').click()
print('点击查看更多')
except:
pass
time.sleep(random.randint(3, 5))
name = tree.xpath('//div[@class="article-item-info clearfloat"]/p/a//text()')
detail = tree.xpath('//div[@class="article-item-info clearfloat"]/a//text()')
froms = tree.xpath('//div[@class="kr-flow-bar"]/a//text()')
times = tree.xpath('//span[@class="kr-flow-bar-time"]//text()')
for i in range(len(name)):
print(f'标题:{name[i]}\n内容:{detail[i]}\n来源:{froms[i]}\n发布时间:{times[i]}\n')