博客园首页登陆,获取HTML文档

import re
import cookielib
import urllib
import urllib2

def printDelimiter():  #打印分割线
    print '-'*80

printDelimiter()
print '[preparation] using cookiejar & HTTPCookieProcessor to antomatically handle cookies'
cj=cookielib.CookieJar()   #新建CookieJar
opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))   #新建opener,HTTPCookieProcessor操作CookieJar
urllib2.install_opener(opener)   #安装opener

printDelimiter()
print '[step1] to get cookie'
mainUrl='http://www.cnblogs.com/'   #博客园主页URL
resp=urllib2.urlopen(mainUrl)     #打开URL
for index,cookie in enumerate(cj):      #枚举方式返回cookie的值
    print '[',index,']',cookie

printDelimiter()
print '[setp2] emulate login Cnblogs'
loginUrl='http://passport.cnblogs.com/login.aspx?ReturnUrl=http%3a%2f%2fwww.cnblogs.com%2f'  #接收用户名密码的URL
postDict={                #封装Post请求的包体数据
    '__EVENTTARGET=':'',
    '__EVENTARGUMENT':'',
    '__VIEWSTATE':'%2FwEPDwULLTE1MzYzODg2NzZkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQtjaGtSZW1lbWJlcm1QYDyKKI9af4b67Mzq2xFaL9Bt',
    'tbUserName':'XXXXXXXXXX',
    'tbPassword':'XXXXXXXXXX',
    'btnLogin':'%E7%99%BB++%E5%BD%95&txtReturnUrl=http%3A%2F%2Fwww.cnblogs.com%2F',
    }
postData=urllib.urlencode(postDict)        #对Post请求的包体进行编码
req=urllib2.Request(loginUrl,postData)       #向URL发送Post请求包体
req.add_header('Content-Type','application/x-www-form-urlencoded')        #添加头部信息
resq=urllib2.urlopen(req)      #打开URL
resqHtml=resq.read()        #HTML格式打开返回信息
print resqHtml      #打印HTML内容

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">

<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>

 错误提示 - 博客园用户中心

</title><link href="http://common.cnblogs.com/css/reset.css" type="text/css" rel="Stylesheet" /><link rel="stylesheet" type="text/css" href="/css/screen_base.css?id=20140305" />

<link rel="stylesheet" type="text/css" href="/css/passport.css" />

</head>

<body>

    <form name="aspnetForm" method="post" action="error.aspx?aspxerrorpath=%2flogin.aspx" id="aspnetForm">

<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJNDMyNDU0NjAzD2QWAmYPZBYCAgMPZBYCZg9kFgICAQ8WAh4EVGV4dAUM55So5oi35Lit5b+DZGTySzz2XfoMc0P93mYlvEKj6lgIDg==" />

 

    <div id="hd">

        <div id="hd_nav">&laquo;<a class="gray" href="http://www.cnblogs.com" title="程序员的网上家园">博客园</a>          

        </div>

        <div id="login_area"> 

        <script type="text/javascript" src="/scriptCurrentUserInfo.aspx"></script>     

        </div>              

    </div>

    <div id="wrap">

      <div id="header">

         

 

<div class="header_div">

 <div class="logo">

  <a href="http://www.cnblogs.com"><img src="http://static.cnblogs.com/images/logo.gif" alt="logo" width="180" height="68"/></a>

     </div>

 <div class="banner">

 </div>

</div>

<div class="mainmenu">

 <ul id="navlist_main">

  <li>&nbsp;&nbsp;&nbsp;&nbsp;</li> 

  <li><a href="/login.aspx">登 录</a></li> 

  <li><a href="/register.aspx" class="current">用户中心</a></li>       

  <li><a href="/BlogApply.aspx">申请博客</a></li> 

  <li><a href="/GetUsername.aspx">找回用户名</a></li>

  <li><a href="/GetMyPassword.aspx">重置密码</a></li>

 </ul>

</div>

 

 

 

        </div>

       <div id="sideleft">

       

<h3 class="topic_title">错误提示</h3>

<div class="main">

抱歉!发生了错误!请与管理员<a href="http://www.cnblogs.com/ContactUs.aspx">联系</a>。

</div>

<div class="return_block">

<a href="http://www.cnblogs.com" class="lnk_btn">返回网站首页</a>

</div>

 

        </div>

        <div id="sideright">

      

 

        </div>

        <div class="clear"></div>

        <div id="footer">

        <a href="http://www.cnblogs.com/AboutUS.aspx">关于博客园</a><a href="http://www.cnblogs.com/ContactUs.aspx">联系我们</a><a href="http://www.cnblogs.com/ad.aspx">广告服务</a><span>2004-2014</span><a href="http://www.cnblogs.com">博客园</a>版权所有 保留所有权利

        </div>

    </div>

    </form>

</body>

</html>



    

posted @ 2014-03-26 06:33  junezhang  阅读(693)  评论(0编辑  收藏  举报