import cookielib, urllib, urllib2,time def visit(user,password): login=user password=password # Enable cookie support for urllib2 cookiejar = cookielib.CookieJar() urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) url='http://www.renren.com/PLogin.do' postdata={ 'email':login, 'password':password, 'domain':'renren.com' } req=urllib2.Request( url, urllib.urlencode(postdata) ) file=urllib2.urlopen(req).read() # Send login/password to the site and get the session cookie data = urllib.urlencode(postdata) request = urllib2.Request(url, data) url = urlOpener.open(request) # Our cookiejar automatically receives the cookies page = url.read(500) print page # Make sure we are logged in by checking the presence of the cookie "id". # (which is the cookie containing the session identifier.) if not 'id' in [cookie.name for cookie in cookiejar]: raise ValueError, "Login failed with login=%s, password=%s" % (login,password) print "We are logged in !" # Make another request with our session cookie # (Our urlOpener automatically uses cookies from our cookiejar) url = urlOpener.open('http://www.renren.com/profile.do?id=279124056') page = url.read(200000) while(1): visit( '','') time.sleep(16*60) visit( '','') time.sleep(16*60) print "ok"
from sgmllib import SGMLParser import sys,urllib2,urllib,cookielib class spider(SGMLParser): def __init__(self,email,password): SGMLParser.__init__(self) self.h3=False self.h3_is_ready=False self.div=False self.h3_and_div=False self.a=False self.depth=0 self.names="" self.dic={} self.email=email self.password=password self.domain='renren.com' try: cookie=cookielib.CookieJar() cookieProc=urllib2.HTTPCookieProcessor(cookie) except: raise else: opener=urllib2.build_opener(cookieProc) urllib2.install_opener(opener) def login(self): url='http://www.renren.com/PLogin.do' postdata={ 'email':self.email, 'password':self.password, 'domain':self.domain } req=urllib2.Request( url, urllib.urlencode(postdata) ) self.file=urllib2.urlopen(req).read() #print self.file def start_h3(self,attrs): self.h3 = True def end_h3(self): self.h3=False self.h3_is_ready=True def start_a(self,attrs): if self.h3 or self.div: self.a=True def end_a(self): self.a=False def start_div(self,attrs): if self.h3_is_ready == False: return if self.div==True: self.depth += 1 for k,v in attrs: if k == 'class' and v == 'content': self.div=True; self.h3_and_div=True #h3 and div is connected def end_div(self): if self.depth == 0: self.div=False self.h3_and_div=False self.h3_is_ready=False self.names="" if self.div == True: self.depth-=1 def handle_data(self,text): #record the name if self.h3 and self.a: self.names+=text #record says if self.h3 and (self.a==False): if not text:pass else: self.dic.setdefault(self.names,[]).append(text) return if self.h3_and_div: self.dic.setdefault(self.names,[]).append(text) def show(self): type = sys.getfilesystemencoding() for key in self.dic: print ( (''.join(key)).replace(' ','')).decode('utf-8').encode(type), \ ( (''.join(self.dic[key])).replace(' ','')).decode('utf-8').encode(type) def visit(self,want_url): pass # visitPage=opener.open(want_url) # data=visitPage.read() # file=open("a.html",'w') # file.writelines(data) # print data[100:200] # if visitPage.geturl()==want_url: # print " add one " # else: # print "failed login" # print visitPage.geturl() renrenspider=spider("","") #renrenspider=spider('huashiyiqike2@qq.com','huashiyiqike') renrenspider.login() # renrenspider.feed(renrenspider.file) renrenspider.show() renrenspider.visit("http://www.renren.com/profile.do?id=279124056") renrenspider=spider("","") renrenspider.login() # renrenspider.feed(renrenspider.file) # renrenspider.show() renrenspider.visit("http://www.renren.com/profile.do?id=279124056") renrenspider=spider("","") renrenspider.login() # renrenspider.feed(renrenspider.file) # renrenspider.show() renrenspider.visit("http://www.renren.com/profile.do?id=279124056")
import cookielib, urllib, urllib2,time def visit(user,password): login=user password=password # Enable cookie support for urllib2 cookiejar = cookielib.CookieJar() urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) url='http://www.renren.com/PLogin.do' postdata={ 'email':login, 'password':password, 'domain':'renren.com' } req=urllib2.Request( url, urllib.urlencode(postdata) ) file=urllib2.urlopen(req).read() # Send login/password to the site and get the session cookie data = urllib.urlencode(postdata) request = urllib2.Request(url, data) url = urlOpener.open(request) # Our cookiejar automatically receives the cookies page = url.read(500) # print page # Make sure we are logged in by checking the presence of the cookie "id". # (which is the cookie containing the session identifier.) if not 'id' in [cookie.name for cookie in cookiejar]: raise ValueError, "Login failed with login=%s, password=%s" % (login,password) print "We are logged in !" # Make another request with our session cookie # (Our urlOpener automatically uses cookies from our cookiejar) url = urlOpener.open('http://www.renren.com/profile.do?id=279124056') page = url.read(200000) count=0 while(True): count+=5 visit( 'vista.blueprint@gmail.com','') print 'vista '+time.strftime('%H:%M:%S',time.localtime()) time.sleep(181)