scrapy电影天堂练习
movie.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | import scrapy from movieProject.items import MovieprojectItem class MovieSpider(scrapy.Spider): name = 'movie' allowed_domains = [ 'www.ygdy8.net' ] start_urls = [ 'https://www.ygdy8.net/html/gndy/china/index.html' ] def parse( self , response): print ( "电影天堂" ) movieList = response.xpath( '//table//tr[2]/td[2]/b/a[2]' ) for item in movieList: movieName = item.xpath( './text()' ).extract_first() movieUrl = 'https://www.ygdy8.net' + item.xpath( './@href' ).extract_first() print (movieName, movieUrl)<br> #使用meta将movieName传给secon_parse方法 yield scrapy.Request(url = movieUrl, callback = self .second_parse,meta = { 'movieName' :movieName}) def second_parse( self ,response): print ( "二次解析之前" )<br> #打开src进入到详情页面,然后获取详情页面的图片地址 secondUrl = response.xpath( '//div[@id="Zoom"]//img/@src' ).extract_first() print ( "第二次访问" , secondUrl) movieName = response.meta[ 'movieName' ] movie = MovieprojectItem(movieName = movieName, movieUrl = secondUrl) yield movie |
items.py
1 2 3 4 5 6 | class MovieprojectItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() movieName = scrapy.Field() movieUrl = scrapy.Field() pass |
pipelines.py
1 2 3 4 5 6 7 8 9 10 | class MovieprojectPipeline: def open_spider( self , spider): self .fp = open ( 'movie.json' , 'w' , encoding = 'utf-8' ) def process_item( self , item, spider): self .fp.write( str (item)) return item def close_spider( self , spider): self .fp.close() |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 按钮权限的设计及实现
· 25岁的心里话