爬取迷你mp4各个电影信息

网站:www.minimp4.com

# coding=utf-8
import requests
from lxml import etree
class Minimpe_moves(object):

    def Getmovies(self,page):
        url = 'http://www.minimp4.com/movie/?page={}'.format(page)
        html = requests.get(url)
        htmml = etree.HTML(html.text)#解析网页
        href = htmml.xpath('//div[@class="meta"]/h1/a/@href')

        for url_moves in href:
            html1 = requests.get(url_moves)
            htmml1 = etree.HTML(html1.text)
            movie_name = htmml1.xpath('//div[@class="movie-meta"]/h1/text()')#提取电影名字
            #movie_actor =htmml1.xpath('//div[@class="movie-meta"]/p[@id="casts"]/a/text()')#爬取主演名字

            print (movie_name)
            Minimpe_moves.saveMovies(movie_name)
#语法糖 装饰器 静态方法
    @staticmethod
    def saveMovies(data):
        with open('movies.txt','a',encoding = 'utf-8') as f:
            f.write(data[0]+'\n')




#内置属性,别的文件引入这个文件时,无法执行下面代码
if __name__ == "__main__":
    minimp4 = Minimpe_moves ()

    for n in range(11):#爬取1到10页
        minimp4.Getmovies(n)

上图是各个电影主演,基本上每部电影的各个信息都能爬取下来,只需修改对应的xpath即可.

 

 

 

posted @ 2018-08-09 00:01  龙~白  阅读(528)  评论(0编辑  收藏  举报