selenium登录csdn,urllib抓取数据
selenium登录csdn,urllib抓取数据:
import selenium import requests import selenium.webdriver import selenium.webdriver.common.keys import time #需要手动滑动验证码 driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/login?code=public") login=driver.find_element_by_link_text("账号密码登录") login.click() time.sleep(5) username=driver.find_element_by_id("all") username.send_keys("用户名") time.sleep(3) password=driver.find_element_by_id("password-number") password.send_keys("密码") time.sleep(5) logins=driver.find_element_by_xpath("//*[@id=\"app\"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button") time.sleep(10) #手动滑动验证码 logins.click() print(driver.page_source) time.sleep(15) #等待cookie加载 print("开始会话") req=requests.session() #会话 打开一个网页,直到关闭浏览器之前 都是会话 cookies=driver.get_cookies() #抓取全部的cookie for cookie in cookies: req.cookies.set(cookie['name'],cookie["value"]) req.headers.clear()#清空头 newpage=req.get("http://my.csdn.net/") print("会话完成") print(newpage.text) #页面 time.sleep(10) driver.close()
urllib保存cookie:
#coding:utf-8 import selenium import selenium.webdriver import time import lxml import lxml.etree import requests import urllib.request ''' driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/account/login?") time.sleep(3) user=driver.find_element_by_id("username") password=driver.find_element_by_id("password") submit=driver.find_element_by_class_name("logging") user.clear() password.clear() time.sleep(1) user.send_keys("yincheng01@163.com") password.send_keys("yinchengak47.net") time.sleep(1) submit.click() time.sleep(10) #等待页面加载, cookies=driver.get_cookies()#抓取全部的cookie print cookies print "------------------------" driver.close() ''' print("开始会话") headers={ # "Host": "my.csdn.net", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "DNT": "1", "Referer": "http://www.csdn.net/", #"Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", u"cookie":u"uuid_tt_dd=-1734079490838081701_20171010; bdshare_firstime=1507966544895; UserName=yinghuming; UserInfo=LZTCl6p9mr%2BUgX1cEEgqwIand1mBReKkuogvIYHivh6MdgAq8c4Y4%2Fmx1uhFT%2FmWFuTu%2BCna36D%2BZ7ssW7xuzAjlIwc7Vgjd7Y7zTDJqy%2FakzOPFEGR52GRrp8sf0i9NK7p2hdvM39vRq5Y7NLJObQ%3D%3D; UserNick=%E8%8B%B1%E9%9B%84%E6%97%A0%E6%95%8C2017; AU=821; UD=%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80%E5%A4%A9%E4%B8%8B%E8%90%A5%E9%94%80; UN=yincheng0571; UE=\"yincheng01@163.com\"; BT=1508039179648; access-token=8260e0b7-a35c-419d-b4af-1f02d51c677d; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1507965242,1507969974,1508038063,1508039035; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1508039041; dc_tos=oxuidd; dc_session_id=1508039034960_0.6956040327941211" } request=urllib.request.Request("http://my.csdn.net/",headers=headers) response=urllib.request.urlopen(request) newpagetext=response.read() file=open("csdn.txt","wb") file.write(newpagetext) file.close() print(newpagetext) print("会话完成") time.sleep(10) time.sleep(10) #driver.close()
selenium+urllib 模拟登录 抓取数据:
import selenium import selenium.webdriver import selenium.webdriver.common.keys import urllib.request import time #需要手动滑动验证码 driver = selenium.webdriver.Chrome() driver.get("https://passport.csdn.net/login?code=public") login=driver.find_element_by_link_text("账号密码登录") login.click() time.sleep(5) username=driver.find_element_by_id("all") username.send_keys("用户名") time.sleep(3) password=driver.find_element_by_id("password-number") password.send_keys("密码") time.sleep(5) logins=driver.find_element_by_xpath("//*[@id=\"app\"]/div/div/div[1]/div[2]/div[5]/div/div[6]/div/button") time.sleep(10) #手动滑动验证码 logins.click() print(driver.page_source) time.sleep(15) #等待cookie加载 cookies=driver.get_cookies() #抓取全部的cookie print(cookies) cookiestr="" for cookie in cookies: #每一条cookie信息 print(cookie['name'],cookie["value"]) #一般用于登录的信息都在name和value里 cookiestr += (str(cookie["name"]) + "=" + str(cookie["value"]) + ";") print("------------------------") #cookies print("开始会话") headers={ "Host": "my.csdn.net", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "DNT": "1", "Referer": "http://www.csdn.net/", #"Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.8", u"cookie": cookiestr # u为中文转义 } request=urllib.request.Request("http://my.csdn.net/",headers=headers) response=urllib.request.urlopen(request) newpagetext=response.read() file=open("csdn.txt","wb") file.write(newpagetext) file.close() print(newpagetext) print("会话完成") time.sleep(10) driver.close()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?