python Scrapy google trends
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | # -*- coding: utf-8 -*- import scrapy,json from urllib import parse class GoogleTrendsSpider(scrapy.Spider): name = 'google_trends' allowed_domains = [ 'google.com' ] #获取token地址 GENERAL_URL = 'https://trends.google.com/trends/api/explore?{}' #获取关键字csv地址 INTEREST_OVER_TIME_URL = 'https://trends.google.com/trends/api/widgetdata/multiline/csv?{}' #开启useragent和代理中间件 custom_settings = { 'DOWNLOADER_MIDDLEWARES' : { 'blockchain.middlewares.RandomUserAgent' : 390 , 'blockchain.middlewares.RandomProxy' : 544 , }, # 'COOKIES_ENABLED' : False 'DOWNLOAD_DELAY' : 1 } def start_requests( self ): ''' 从页面中获取的csv下载url参数 'keyword': '关键字', 'time': 'now 7-d', 'geo': '' ''' req = { 'comparisonItem' :[{ 'keyword' : '关键字' , 'time' : 'now 7-d' , 'geo' : ''}], 'category' : 0 } req = json.dumps(req).encode( 'utf-8' ) token_payload = { 'hl' : b 'en-US' , 'tz' : b '-480' , 'req' : req, 'property' : b'', } body = parse.urlencode(token_payload) url = self .GENERAL_URL. format (body) reqs.append(scrapy.Request(url = url,callback = self .parse_token,meta = { 'item' :{ 'coin_id' :row. id }})) return reqs def parse_token( self ,response): ''' 解析结果,获取token ''' bodyObj = json.loads(response.body.decode( 'utf-8' )[ 4 :]) for row in bodyObj[ 'widgets' ]: if row[ 'id' ] = = 'TIMESERIES' : token = row[ 'token' ] request = row[ 'request' ] params = { 'tz' : '-480' , 'req' : json.dumps(request), 'token' :token } body = parse.urlencode(params) url = self .INTEREST_OVER_TIME_URL. format (body) yield scrapy.Request(url = url,callback = self .parse_row,meta = { 'item' :response.meta[ 'item' ]}) def parse_row( self , response): ''' 解析csv ''' bodytext = response.body.decode( 'utf-8' ) print (bodytext) |
抄自:pytrends
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步