换个方式爬取人人网

from urllib import request

from urllib import parse,request

#在python当中使用用户名和密码进行登录，然后保存cookie
from http import cookiejar
cookie = cookiejar.CookieJar() #生成cookie对象
cookie_handler = request.HTTPCookieProcessor(cookie)#生成cookie管理器
http_handler = request.HTTPHandler() #http请求管理器
https_handler = request.HTTPSHandler() #https请求管理器

#发起请求管理器
opener = request.build_opener(http_handler,https_handler,cookie_handler)

#登录
def login():
    #登录的网站地址
    login_url = "http://www.renren.com/PLogin.do"

    data = {
        "email":"18811176939",
        "password":"123457"
    }

    data = parse.urlencode(data)

    req = request.Request(login_url,data=bytes(data,'utf-8'))
    response = opener.open(req)
    html = response.read()
    html = html.decode('utf-8')
    print(html)


#主页
def getHomePage():
    # (1)
    base_url = "http://www.renren.com/964508169/profile"

    response =opener.open(base_url)

    # (3)
    html = response.read()

    # (4)
    html = html.decode("utf-8")

    # (5)
    print(html)

#主进程
if __name__ == "__main__":
    #login()

    getHomePage() #获取个人主页

posted @ 2018-03-10 21:25 Bob__Zhang 阅读(165) 评论(0) 编辑收藏举报

刷新页面返回顶部

白桦林

换个方式爬取人人网

公告