python爬取23456789电影目录
参考:https://blog.csdn.net/qq_21933615/article/details/81171951
废话不多说直接上代码
from urllib import request
from bs4 import BeautifulSoup
url = 'http://kan.2345.com/vip/list/-----.html' # 这个网页编码是gb2312,下面写gb2312
# url = 'https://movie.douban.com/cinema/nowplaying/shanghai/' # utf-8
def getHtml(url):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
req = request.Request(url, headers=headers)
response = request.urlopen(req)
if response.getcode() == 200:
htmlStr = response.read().decode("gb2312")
return htmlStr
else:
return print('返回头不是200')
def analysisData(url):
html = getHtml(url)
soup = BeautifulSoup(html, 'html.parser')
findData1 = soup.find('div', attrs={'class': 'v_picConBox'})
findData2 = findData1.find('ul', attrs={'class': 'v_picTxt'})
findData3 = findData2.find_all('li')
for liVal in findData3:
imgUrl = liVal.find('img').get('data-src')
score = liVal.find('em', attrs={'class': 'emScore'}).getText()
title = liVal.find('em', attrs={'class': 'emTit'}).getText()
print(title)
analysisData(url)
其实跟php差不多,然后就是“美味汤”语法要好好看看