爬虫----表单登录
1.post提交表单
# -*- coding:utf-8 -*-
import urllib, urllib2
import cookielib
import lxml.html
from lxml import etree
def getcookie():
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
return opener
def get_data(html):
tree = lxml.html.fromstring(html)
data = {}
for i in tree.cssselect('form input'):
if i.get('name'):
data[i.get('name')] = i.get('value')
return data
def main():
url = 'https://www.douban.com/accounts/login?source=main'
html = urllib2.urlopen(url).read()
data = get_data(html)
data['login'] = 'login'
data['form_email'] = '1111111111@qq.com'
data['form_password'] = 'admin'
encode_data = urllib.urlencode(data)
request = urllib2.Request(url, encode_data)
opener = getcookie()
response = opener.open(request)
print response.geturl()
if __name__ == "__main__":
main()
2.高级模块mechanize
# -*- coding:utf-8 -*-
import mechanize
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def main():
url = 'https://www.douban.com/accounts/login?source=main'
br = mechanize.Browser()
br.open(url)
# for form in br.forms():
# print form
br.select_form(nr=0)
br['form_email'] = '1111111111@qq.com'
br['form_password'] = 'admin'
response = br.submit()
print response.geturl()
if __name__ == '__main__':
main()