python爬取23456789电影目录

参考:https://blog.csdn.net/qq_21933615/article/details/81171951
废话不多说直接上代码

from urllib import request
from bs4 import BeautifulSoup

url = 'http://kan.2345.com/vip/list/-----.html'  # 这个网页编码是gb2312,下面写gb2312


# url = 'https://movie.douban.com/cinema/nowplaying/shanghai/'  # utf-8


def getHtml(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
    }
    req = request.Request(url, headers=headers)
    response = request.urlopen(req)

    if response.getcode() == 200:
        htmlStr = response.read().decode("gb2312")
        return htmlStr
    else:
        return print('返回头不是200')


def analysisData(url):
    html = getHtml(url)
    soup = BeautifulSoup(html, 'html.parser')
    findData1 = soup.find('div', attrs={'class': 'v_picConBox'})
    findData2 = findData1.find('ul', attrs={'class': 'v_picTxt'})
    findData3 = findData2.find_all('li')
    for liVal in findData3:
        imgUrl = liVal.find('img').get('data-src')
        score = liVal.find('em', attrs={'class': 'emScore'}).getText()
        title = liVal.find('em', attrs={'class': 'emTit'}).getText()
        print(title)


analysisData(url)

其实跟php差不多,然后就是“美味汤”语法要好好看看

posted @ 2019-07-11 17:27  蜗牛使劲冲  阅读(9)  评论(0编辑  收藏  举报  来源