爬取开心网
#注册一个开心网的账号,并且爬取主页内容 from urllib import request,parse from http import cookiejar import ssl ssl._create_default_https_context = ssl._create_unverified_context #request.urlopen()#并不能够保存cookie #(一)定义请求管理器 http_handler = request.HTTPHandler() https_handler = request.HTTPSHandler() cookie = cookiejar.CookieJar() cookie_handler = request.HTTPCookieProcessor(cookie) #生成一个请求管理器 opener = request.build_opener(http_handler,https_handler,cookie_handler) #(二)登录 def login(): #(1) login_url = 'https://security.kaixin001.com/login/login_post.php' loginemail = input('请输入用户名:') password = input("请输入密码:") data = { 'loginemail':loginemail, 'password':password } data = parse.urlencode(data) headers = { } req = request.Request(url=login_url,data=bytes(data,encoding='utf-8'),headers=headers) #(2) response = opener.open(req) #(3) html = response.read() #(4) html = html.decode('utf-8') print(html) def homepage(): base_url = 'http://www.kaixin001.com/home/?_profileuid=181673967&t=90' loginemail = input('请输入用户名:') password = input("请输入密码:") data = { 'loginemail': loginemail, 'password': password } data = parse.urlencode(data) headers = { } req = request.Request(url=base_url,data=bytes(data,encoding='utf-8'),headers=headers) #(2) response = opener.open(req) #(3) html = response.read() #(4) html = html.decode('utf-8') print(html) if __name__ == '__main__': login() homepage()