博客园首页登陆,获取HTML文档
import re
import cookielib
import urllib
import urllib2
def printDelimiter(): #打印分割线
print '-'*80
printDelimiter()
print '[preparation] using cookiejar & HTTPCookieProcessor to antomatically handle cookies'
cj=cookielib.CookieJar() #新建CookieJar
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) #新建opener,HTTPCookieProcessor操作CookieJar
urllib2.install_opener(opener) #安装opener
printDelimiter()
print '[step1] to get cookie'
mainUrl='http://www.cnblogs.com/' #博客园主页URL
resp=urllib2.urlopen(mainUrl) #打开URL
for index,cookie in enumerate(cj): #枚举方式返回cookie的值
print '[',index,']',cookie
printDelimiter()
print '[setp2] emulate login Cnblogs'
loginUrl='http://passport.cnblogs.com/login.aspx?ReturnUrl=http%3a%2f%2fwww.cnblogs.com%2f' #接收用户名密码的URL
postDict={ #封装Post请求的包体数据
'__EVENTTARGET=':'',
'__EVENTARGUMENT':'',
'__VIEWSTATE':'%2FwEPDwULLTE1MzYzODg2NzZkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQtjaGtSZW1lbWJlcm1QYDyKKI9af4b67Mzq2xFaL9Bt',
'tbUserName':'XXXXXXXXXX',
'tbPassword':'XXXXXXXXXX',
'btnLogin':'%E7%99%BB++%E5%BD%95&txtReturnUrl=http%3A%2F%2Fwww.cnblogs.com%2F',
}
postData=urllib.urlencode(postDict) #对Post请求的包体进行编码
req=urllib2.Request(loginUrl,postData) #向URL发送Post请求包体
req.add_header('Content-Type','application/x-www-form-urlencoded') #添加头部信息
resq=urllib2.urlopen(req) #打开URL
resqHtml=resq.read() #HTML格式打开返回信息
print resqHtml #打印HTML内容
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>
错误提示 - 博客园用户中心
</title><link href="http://common.cnblogs.com/css/reset.css" type="text/css" rel="Stylesheet" /><link rel="stylesheet" type="text/css" href="/css/screen_base.css?id=20140305" />
<link rel="stylesheet" type="text/css" href="/css/passport.css" />
</head>
<body>
<form name="aspnetForm" method="post" action="error.aspx?aspxerrorpath=%2flogin.aspx" id="aspnetForm">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJNDMyNDU0NjAzD2QWAmYPZBYCAgMPZBYCZg9kFgICAQ8WAh4EVGV4dAUM55So5oi35Lit5b+DZGTySzz2XfoMc0P93mYlvEKj6lgIDg==" />
<div id="hd">
<div id="hd_nav">«<a class="gray" href="http://www.cnblogs.com" title="程序员的网上家园">博客园</a>
</div>
<div id="login_area">
<script type="text/javascript" src="/scriptCurrentUserInfo.aspx"></script>
</div>
</div>
<div id="wrap">
<div id="header">
<div class="header_div">
<div class="logo">
<a href="http://www.cnblogs.com"><img src="http://static.cnblogs.com/images/logo.gif" alt="logo" width="180" height="68"/></a>
</div>
<div class="banner">
</div>
</div>
<div class="mainmenu">
<ul id="navlist_main">
<li> </li>
<li><a href="/login.aspx">登 录</a></li>
<li><a href="/register.aspx" class="current">用户中心</a></li>
<li><a href="/BlogApply.aspx">申请博客</a></li>
<li><a href="/GetUsername.aspx">找回用户名</a></li>
<li><a href="/GetMyPassword.aspx">重置密码</a></li>
</ul>
</div>
</div>
<div id="sideleft">
<h3 class="topic_title">错误提示</h3>
<div class="main">
抱歉!发生了错误!请与管理员<a href="http://www.cnblogs.com/ContactUs.aspx">联系</a>。
</div>
<div class="return_block">
<a href="http://www.cnblogs.com" class="lnk_btn">返回网站首页</a>
</div>
</div>
<div id="sideright">
</div>
<div class="clear"></div>
<div id="footer">
<a href="http://www.cnblogs.com/AboutUS.aspx">关于博客园</a><a href="http://www.cnblogs.com/ContactUs.aspx">联系我们</a><a href="http://www.cnblogs.com/ad.aspx">广告服务</a><span>2004-2014</span><a href="http://www.cnblogs.com">博客园</a>版权所有 保留所有权利
</div>
</div>
</form>
</body>
</html>