twitter api的使用获取关注者的时间线
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | # -*- coding: utf-8 -*- from tweepy import OAuthHandler import datetime import pandas as pd import tweepy import time import random import traceback from dbs.db import * class Twitter_Spider(): def __init__(self): self.main_tw_url= "https://twitter.com/{}/status/{}" self.china_time_list = [] self.twitter_id_list = [] self.twitter_url_list = [] self.twitter_text_list = [] self.twitter_url_list = [] self.update_time_list = [] self.twitter_dicts = {} self.user_id_list = [] self.user_name_list = [] self.crate_time_list=[] self.userdicts={} self.stopflag=False def getapi(self): consumer_key = 'IAaj345Xf673kzT2' consumer_secret = 'ee9WEQ235555We0gP4peRbOPeeHGX1' access_token = '9767625356VEnq7s9ZXOHEI' access_secret = 'lyqj2122333o9G4fHta' auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_secret) api = tweepy.API(auth) return api def start(self): api =None try : api = self.getapi() except tweepy.TweepError: print( 'Error! Failed to get access token.' ) nowdate = datetime.datetime.now() beforeweek=nowdate-datetime.timedelta(days=7) #一周前 #只获取关注者id # ids = [] # for page in tweepy.Cursor(api.friends_ids).pages(): # ids.extend(page) # 获取关注者id和screen_name for pages in tweepy.Cursor(api.friends).pages(): for page in pages: userstr = page._json self.userdicts.setdefault(userstr. get ( "id" ),userstr. get ( "screen_name" )) for user_id in self.userdicts.keys(): self.stopflag=False self.twitter_dicts.clear() for i,statuslist in enumerate(tweepy.Cursor(api.user_timeline, id=user_id).pages()): #获取第一页,一页20个 if not self.stopflag: print( "start page:{}" .format(i)) for status in statuslist: jsonstr = status._json if self.getItem(jsonstr,beforeweek,user_id): break else : break time.sleep(random.randint(2, 6)) self.twitter_dicts.setdefault( "user_id" , self.user_id_list) self.twitter_dicts.setdefault( "user_name" , self.user_name_list) self.twitter_dicts.setdefault( "china_time" ,self.china_time_list) self.twitter_dicts.setdefault( "tw_time" , self.crate_time_list) self.twitter_dicts.setdefault( "tw_id" , self.twitter_id_list) self.twitter_dicts.setdefault( "tw_text" , self.twitter_text_list) self.twitter_dicts.setdefault( "tw_url" , self.twitter_url_list) self.twitter_dicts.setdefault( "updatetime" , self.update_time_list) try : SaveData().save_object_data(self.twitter_dicts) except: print(traceback.format_exc( "insert db error" )) def getItem(self, jsonstr,beforeweek,user_id): create_time = jsonstr. get ( "created_at" ) china_time= "" try : china_time=datetime.datetime.strptime(create_time, "%a %b %d %H:%M:%S +0000 %Y" ) if beforeweek>china_time: self.stopflag=True return self.stopflag except: print(traceback.format_exc()) screen_name=self.userdicts. get (user_id) self.china_time_list.append(china_time) self.crate_time_list.append(create_time) self.user_id_list.append(user_id) self.user_name_list.append(screen_name) twitter_id = jsonstr. get ( "id" ) self.twitter_id_list.append(twitter_id) self.twitter_url_list.append(self.main_tw_url.format(screen_name,twitter_id)) twitter_text = jsonstr. get ( "text" ) print(twitter_text) self.twitter_text_list.append(twitter_text) self.update_time_list.append(datetime.datetime.now()) return self.stopflag if __name__ == "__main__" : Twitter_Spider().start() |
本文作者:一起来学python
本文链接:https://www.cnblogs.com/c-x-a/p/8623816.html
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
2017-03-22 php 日期格式转换万能公式