python爬虫小练习

import re
import requests
from lxml import etree
from bs4 import BeautifulSoup

def novel():
    url = 'https://www.bqkan8.com/50_50096/18520412.html'
    response = requests.get(url)
    html = etree.HTML(response.text)
    title = re.findall("<h1>(.*?)</h1>", response.text)[0]
    print('标题：', title)
    content = html.xpath('//div[@id="content"]/text()')
    res = ''
    for i in content[:-2]:
        res += i.strip().replace('&1t;/p>', '')
    print('正文：', res)

def house():
    url = 'https://beijing.qfang.com/newhouse'
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'lxml')
    first_house_title = soup.select('.house-name')[0].text
    print(first_house_title)

if __name__ == '__main__':
    novel()
    house()

import requests
from lxml import etree
url = 'https://www.shanghairanking.cn/rankings/bcur/2021'
r = requests.get(url)
html = etree.HTML(r.content.decode())
tr_list = html.xpath('//*[@class="rk-table"]/tbody/tr')

for tr in tr_list:
    item = {}
    item['rank'] = tr.xpath('.//td[1]/div/text()')[0].strip()  # 排名
    item['name_cn'] = tr.xpath('.//*[@class="name-cn"]/text()')[0]  # 中文学校名称
    item['name_en'] = tr.xpath('.//*[@class="name-en"]/text()')[0]  # 英文学校名称
    item['tags'] = tr.xpath('.//*[@class="tags"]/text()')[0]  # 学校标签
    item['province'] = tr.xpath('.//td[3]/text()')[0].strip()  # 省份
    item['category'] = tr.xpath('.//td[4]/text()')[0].strip()  # 类别
    item['total_score'] = tr.xpath('.//td[5]/text()')[0].strip()  # 总分
    print(item)

posted @ 2022-04-08 15:38 不会飞的鲨鱼阅读(69) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

不会飞的鲨鱼

有的人一句话甚至一个眼神都能吸引你，而有的人努力一辈子也得不到你的心。

python爬虫小练习

公告