导出网页邮箱里的联系人-python
网上一强人写的,转过来。支持Gmail,126,网易,搜狐,Hotmail,新浪,雅虎,MSN,经测试正常。不过要先下载BeautifulSoup库。
#!/usr/bin/env python
#coding=utf-8 from BeautifulSoup imp imp imp imp imp GDATA_URL = '/accounts/ClientLogin' class MailContactError(Exception): pass class MailContact: def __init__(self,username,password): pass def login(self): pass def get_contacts(self): pass def get_contact_page(self): pass class GMailContact(MailContact): """ A class to retrieve a users contacts from their Google Account. Dependencies: ------------- * BeautifulSoup. * That's it. :-) Usage: ------ >>> g = GMailContact('email@example.org', 'password') >>> g.login() (200, 'OK') >>> g.get_contacts() >>> g.contacts [(u'Persons Name', 'name@person.com'), ...] """ def __init__(self, username='test@gmail.com', password='test', service='cp'): self.mail_type="@gmail.com" self.username = username + self.mail_type self.password = password self.account_type = 'HOSTED_OR_GOOGLE' # Allow both Google Domain and Gmail accounts self.service = service # Defaults to cp (contacts) self.source = 'google-da self.co self.contacts = [] # Empty list by default, populated by self.get_contacts() def login(self): """ Login to Google. No arguments. """ da 'accountType': self.account_type, 'Email': self.username, 'Passwd': self.password, 'service': self.service, 'source': self.source }) headers = { 'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/plain' } conn = httplib.HTTPSConnection('google.com') conn.request('POST', GDATA_URL, da response = conn.getresponse() if not str(response.status) == '200': raise GdataError("Couldn't log in. HTTP Co d = response.read() self.co conn.close() return response.status, response.reason def _request(self, max_results=200): """ Base function for requesting the contacts. We'll allow other methods eventually """ url = '/m8/feeds/contacts/%s/base/?max-results=%d' % (self.username, max_results) headers = {'Authorization': 'GoogleLogin auth=%s' % self.co conn = httplib.HTTPConnection('www.google.com') conn.request('GET', url, headers=headers) response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't log in. HTTP Co page = response.read() conn.close() return page def get_contacts(self, max_results=200): """ Parses the contacts (using BeautifulSoup) from self._request, and then populates self.contacts """ soup = BeautifulSoup(self._request(max_results)) self.contacts = [] for entry in soup.findAll('title'): if len(entry.parent.findAll(['gd:email', 'title'])) == 2: s = entry.parent.findAll(['gd:email', 'title']) self.contacts.append((s[0].string, s[1].get('address'))) return class M126Contact(MailContact): def __init__(self,username,password): self.mail_type="@126.com" self.username = username self.password = password self.login_host = 'entry.mail.126.com' self.login_url = '/cgi/login?redirTempName=https.htm&hid=10010102&lightweight=1&verifycookie=1&language=0&style=-1' self.login_da 'domain':'126.com', 'language':0, 'bCookie':'', 'user':self.username, 'pass':self.password, 'style':-1, 'remUser':'', 'secure':'', 'enter.x':'%B5%C7+%C2%BC' }) self.login_headers = { 'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/xml,text/plain', 'Refer':'http://www.126.com/' } self.contact_host = 'g2a10.mail.126.com' self.contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=%(sid)s&listnum=200&tempname=address%%2faddress.htm' def login(self): conn = httplib.HTTPSConnection(self.login_host) conn.request('POST', self.login_url,self.login_da response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't log in. HTTP Co #sc="Coremail=aaYgsaQsvSmKa%MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd; path=/; domain=.126.com" #sid="MBgzxnddkKzjPJUTbMddRUIgVwfeiBUd" sc = response.getheader('Set-Cookie') if not sc or sc.find("Coremail") == -1: #用户密码不正确 raise MailContactError("Email user %s%s password %s not correct!" % (self.username,self.mail_type,self.password)) cookie=sc.split()[0] coremail = cookie[cookie.find('=')+1:cookie.find(';')] sid = coremail[coremail.find('%')+1:] self.contact_url = self.contact_url % {'sid':sid} self.contact_headers={ 'Cookie':'MAIL126_SSN=%(user)s; NETEASE_SSN=%(user)s; nts_mail_user=%(user)s; logType=df; ntes_mail_firstpage=normal; \ Coremail=%(coremail)s;mail_host=g2a14.mail.126.com; mail_sid=%(sid)s; mail_uid=%(user)s@126.com; \ mail_style=dm3; oulink_h=520; ntes_mail_noremember=true' % {'user':self.username,'coremail':coremail,'sid':sid} } conn.close() def get_contact_page(self): conn = httplib.HTTPConnection(self.contact_host) conn.request('GET',self.contact_url,headers=self.contact_headers) response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't getc contact page. HTTP Co page = response.read() conn.close() return page def get_contacts(self): page = self.get_contact_page() self.contacts = [] soup = BeautifulSoup(page) xmps = soup.findAll('xmp') for x in xmps: if x['id'].startswith('t'): self.contacts.append((x.contents[0],x.space.string)) class M163Contact(MailContact): def __init__(self,username,password): self.mail_type="@163.com" self.username = username self.password = password self.contacts = [] self.login_host = 'reg.163.com' self.login_url = '/logins.jsp?type=1&url=http://fm163.163.com/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1' self.login_da 'verifycookie':1, 'style':-1, 'product':'mail163', 'username':self.username, 'password':self.password, 'selType':-1, 'remUser':'', 'secure':'on' }) self.login_headers = { 'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/xml,text/plain', 'Refer':'http://mail.163.com/' } self.contact_host = 'g2a10.mail.163.com' def login(self): conn = httplib.HTTPSConnection(self.login_host) conn.request('POST', self.login_url,self.login_da response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't log in. HTTP Co sc1 = response.getheader('Set-Cookie') ''' Set-Cookie: NTES_SESS=ohAWkiyj.OCjHdh1BK4ToxPcUvFX2fSLaN3FaU0cRInzLoieELdifjyqnBdk4C8qWIZkirZ7.JF.IPFDuR7BcAtKL; domain=.163.com; path=/ Set-Cookie: NETEASE_SSN=weafriend; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT Set-Cookie: NETEASE_ADV=11&24&1212921746999; domain=.163.com; path=/; expires=Mon, 08-Jun-2009 10:42:26 GMT ''' ntes_sess,ntes_adv = None,None for s in sc1.split(): if s.startswith('NTES_SESS'): ntes_sess=s[s.find('=')+1:s.find(';')] elif s.startswith('NETEASE_ADV'): ntes_adv=s[s.find('=')+1:s.find(';')] if not ntes_sess or not ntes_adv: #用户密码不正确 raise MailContactError("Email user %s%s password %s not correct!" % (self.username,self.mail_type,self.password)) url = '/coremail/fcg/ntesdoor2?lightweight=1&verifycookie=1&language=-1&style=-1&username=weafriend' headers = {'cookie':sc1} conn = httplib.HTTPConnection('fm163.163.com') conn.request('GET',url,{},headers) response = conn.getresponse() sc2 = response.getheader('Set-Cookie') coremail = sc2[sc2.find('=')+1:sc2.find(';')] sid = coremail[coremail.find('%')+1:] self.contact_url = '/coremail/fcg/ldvcapp?funcid=prtsearchres&sid=' + sid +'&listnum=200&tempname=address%2faddress.htm' self.contact_headers = { 'Cookie':'MAIL163_SSN=%(user)s; vjlast=1212911118; vjuids=-99d7a91f6.1156a6ea3cd.0.9e6d0e6f029e78; \ _ntes_nuid=7118c6a1c9d16ee59a045a2e66186af8; NTES_adMenuNum=3; \ _ntes_nnid=7118c6a1c9d16ee59a045a2e66186af8,0|www|urs|163mail|news|ent|sports|digi|lady|tech|stock|travel|music|2008|;\ NTES_UFC=9110001100010000000000000000000000100000000000000002331026300000; logType=-1; nts_mail_user=weafriend:-1:1; \ Province=010; _ntes_nvst=1212911122953,|www|urs|; Coremail=%(coremail)s; \ wmsvr_domain=g1a109.mail.163.com; ntes_mail_truename=; ntes_mail_province=; ntes_mail_sex=; mail_style=js3; \ mail_host=g1a109.mail.163.com; mail_sid=%(sid)s; USERTRACK=58.31.69.214.1212911333143304; \ ntes_mail_firstpage=normal; NTES_SESS=%(ntes_sess)s; \ NETEASE_SSN=%(user)s; NETEASE_ADV=%(ntes_adv)s' % {'user':self.username,'coremail':coremail,'sid':sid,'ntes_sess':ntes_sess,'ntes_adv':ntes_adv} } return True def get_contact_page(self): conn = httplib.HTTPConnection(self.contact_host) conn.request('GET',self.contact_url,headers=self.contact_headers) response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't getc contact page. HTTP Co page = response.read() conn.close() return page def get_contacts(self): page = self.get_contact_page() soup = BeautifulSoup(page) xmps = soup.findAll('xmp') for x in xmps: if x['id'].startswith('t'): self.contacts.append((x.contents[0],x.space.string)) class SohuContact(MailContact): def __init__(self,username,password): self.mail_type="@sohu.com" self.username = username self.password = password self.contacts = [] self.login_host = 'passport.sohu.com' self.login_url = 'http://passport.sohu.com/login.jsp' self.login_da 'loginid':self.username+self.mail_type, 'passwd':self.password, 'sg':'5175b065623bb194e85903f5e8c43386', 'eru':'http://login.mail.sohu.com/login.php', 'ru':'http://login.mail.sohu.com/login_comm.php', 'appid':1000, 'fl':'1', 'ct':1126084880, 'vr':'1|1' }) self.login_headers = { 'User-agent':'Opera/9.23', 'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/xml,text/plain' } opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) urllib2.install_opener(opener) self.contact_host = 'www50.mail.sohu.com' self.contact_url = '/webapp/contact' def login(self): req = urllib2.Request(self.login_url,self.login_da conn = urllib2.urlopen(req) self.contact_url = os.path.dirname(conn.geturl())+'/contact' def get_contacts(self): req = urllib2.Request(self.contact_url) conn = urllib2.urlopen(req) buf = conn.readlines() imp info = simplejson.loads(buf[0]) for i in info['listString']: self.contacts.append((i['name'],i['email'])) class HotmailContact(MailContact): def __init__(self,username,password): self.mail_type="@hotmail.com" self.username = username self.password = password self.contacts = [] self.login_host = 'login.live.com' self.login_url = '/ppsecure/post.srf?id=2' self.login_da 'login':self.username+self.mail_type, 'passwd':self.password, 'PPSX':'Pass', 'LoginOption':2, 'PwdPad':'IfYouAreReadingThisYouHaveTooMuchFreeTime'[0:-len(self.password)], 'PPFT':'B1S2dWnsGTFLpX9h8fxfE*ym5OABStpt0fjo%21YICXQOy1b%21xP4dRx8F1h1w6tR8ZyLP4h3TYGS8gSZGku3j7CxQ4poqr' }) self.login_headers = { 'Content-type': 'application/x-www-form-urlencoded', 'Accept': 'text/xml,text/plain', 'Cookie': 'CkTst=G1213457870062; MobileProf=2AV3mTOwJEE8smIfIyq69wbCn08y6UX7910BtLhqTto2MYrNSBW5hhlEuGlMJdMwwGq1WcxtENCAI1JSyTNfrS23ArFLxDjBNk!xtbIj0iglbu8DQVg9TnSTPtHj975deR; MUID=C2DC0F9324AA47DCB05CE14B989D89C2; ANON=A=E81AEA51F927860B07BBA712FFFFFFFF&E=69f&W=2; s_lastvisit=1213455335875; MH=MSFT; wlidperf=throughput=2087.201125175809&latency=1.422; MSPRequ=lt=1213455763&co=1&id=2; MSPOK=uuid-d75c4c53-1b6e-433c-af95-c3c0175a48cd; CkTst=G1213455761093; MSPPre=fenyon@hotmail.com; MSPCID=0f45e10de2ad38c9; NAP=V=1.7&E=6b4&C=bKkGf4IbC96JLFhsoKyccKm1Kf7jjhX5I3C1ofjvyMoY3iI9j0b6gg&W=2; MSPSoftVis=@:@; BrowserSense=Win=1&Downlevel=0&WinIEOnly=0&Firefox=1&FirefoxVersion=2.0; mktstate=U=&E=en-us; mkt1=norm=en-us; s_cc=true; s_sq=%5B%5BB%5D%5D; MSPP3RD=3688532421', 'Referer': 'https://login.live.com/ppsecure/post.srf?id=2&bk=1213455763' } self.contact_host = 'by120w.bay120.mail.live.com' self.contact_url = '/mail/GetContacts.aspx' def getInputValue(self,name,content): pass def login(self): # 登录过程见http://blog.jiexoo.com/2008/05/21/%e7%94%a8httpclient%e8%8e%b7%e5 %8f%96hotmail%e8%81%94%e7%b3%bb%e4%ba%ba%e5%88%97%e8%a1%a8/ conn = httplib.HTTPSConnection(self.login_host) conn.request('GET','login.srf?id=2') response = conn.getresponse() conn = httplib.HTTPSConnection(self.login_host) conn.request('POST', self.login_url,self.login_da response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't getc contact page. HTTP Co page = response.read() print page def get_contacts(self): conn = httplib.HTTPConnection(self.contact_host) conn.request('GET',self.contact_url) response = conn.getresponse() if not str(response.status) == '200': raise MailContactError("Couldn't getc contact page. HTTP Co page = response.read() conn.close() print page class SinaContact(MailContact): pass class YahooContact(MailContact): pass class MsnContact(MailContact): pass def get_mailcontact(user,password,mailtype): if mailtype == "126.com": g = M126Contact(user,password) elif mailtype == "163.com": g = M163Contact(user,password) elif mailtype == "sohu.com": g = SohuContact(user,password) elif mailtype == "hotmail.com": g = HotmailContact(user,password) elif mailtype == "sina.com": g = SinaContact(user,password) elif mailtype == "gmail.com": g = GMailContact(user,password) try: g.login() g.get_contacts() return g.contacts except: return [] def get_csvcontact(iter): contact,name = [],None reader = csv.reader(iter) for r in reader: for c in r: if not c or not len(c.strip()): continue m=re.search('\w+@\w+(?:\.\w+)+',c) if m: print name,m.group(0) contact.append((name,m.group(0))) break else: name = c return contact def get_imcontact(iter): contact = [] reader = csv.reader(iter) for r in reader: for c in r: m=re.search('\w+@\w+(?:\.\w+)+',c) if m: print m contact.append((m)) return contact if __name__=='__main__': pdb.set_trace() httplib.HTTPSConnection.debuglevel=1 httplib.HTTPConnection.debuglevel=1 g = GMailContact('***', '***') g.login() g.get_contacts() print g.contacts g = M163ContactContact('***', '***') g.login() g.get_contacts() print g.contacts |
posted on 2009-07-15 17:07 Eric Xiang 阅读(614) 评论(0) 编辑 收藏 举报