BeautifulSoup

import requests
from bs4 import BeautifulSoup

def getHTMLText(url):
    try:
        kv = {'user-agent':'Mozilla/5.0'}
        r = requests.get(url, timeout=30, headers=kv)
        r.raise_for_status()    # 如果状态不是200,引发HTTPError异常
        r.encoding = r.apparent_encoding
        print(r.request.headers)
        print('---------------')
        return r.text[:1000]
    except:
        return '产生异常'


if __name__ == '__main__':
    url = 'http://www.baidu.com'
    demo = getHTMLText(url)
    
    soup = BeautifulSoup(demo, 'html.parser')
    print(soup.prettify())

    print(soup.title)
    print(soup.a.name)
    print(soup.a.parent.name)
    print(soup.a.attrs) # 属性

posted on 2020-03-15 18:39  HolaWorld  阅读(39)  评论(0编辑  收藏  举报

导航