github自动登录

最终实现代码

 1 # coding=utf-8
 2 # Version:python3.6.0
 3 # Tools:Pycharm 2017.3.2
 4 # author ="wlx"
 5 __date__ = '2018/6/14 10:37'
 6 import requests
 7 from bs4 import BeautifulSoup
 8 
 9 ret = requests.get(url="https://github.com/login")
10 ret_cookie_dir = ret.cookies.get_dict()
11 s1 = BeautifulSoup(ret.text, 'html.parser')
12 token = s1.find(name='input', attrs={'name': 'authenticity_token'}).get('value')
13 
14 r = requests.post(
15     url='https://github.com/session',
16     data={
17         'commit': 'Sign in',
18         'utf8': '✓',
19         'authenticity_token': token,
20         'login': '792665319@qq.com',
21         'password': '_97e68fde946b'
22     },
23     headers={
24         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
25     },
26     cookies=ret_cookie_dir
27 )
28 s2 = BeautifulSoup(r.text, 'html.parser')
29 name = s2.find(name = 'strong', attrs={'class': 'css-truncate-target'}).string
30 print('name:', name)

所学知识

简易爬虫request和beautifulsoup爬取汽车之家

# coding=utf-8
# Version:python3.6.0
# Tools:Pycharm 2017.3.2
# author ="wlx"
__date__ = '2018/6/12 21:10'
import requests
from bs4 import BeautifulSoup

ret = requests.get(url="https://www.autohome.com.cn/news/")
# print(ret.content)  #二进制输出内容
# print(ret.apparent_encoding)  # 检测其文档用的是什么编码
# ret.encoding = "gbk"
ret.encoding = ret.apparent_encoding
# print(ret.text)

soup = BeautifulSoup(ret.text, 'html.parser')  # lxml
# print(type(soup))  # 把文本变成了对象<class 'bs4.BeautifulSoup'>
div = soup.find(name='div', id='auto-channel-lazyload-article')  # find()找匹配成功的第一个，只有对象才有find()函数
# id属性用id，类属性不能用class，因为class是python内置关键字用后面 class_='name'or attrs={'id':'id1','class':'wei'}
# print(div)
li_list = div.find_all(name='li')  # find_all()函数匹配所有满足条件的对象，并返回这些对象构成的列表，返回列表后就不能往下找了，只有对象能find往下找
# print(li_list)
for i in li_list:
    h3 = i.find(name='h3')
    if not h3:
        continue
    print(h3.text)
    p = i.find(name='p')
    print(p.text)
    a = i.find('a')  # name='a' 不写name默认为第一个参数name
    # print(a.attrs)  # 取a标记所有属性
    # for key in a.attrs:
    #     print(a.attrs[key])
    print(a.get('href'))  # 取标签指定属性
    img = i.find(name='img')
    # print(img.get('src'))      # 这样做只得到图片的地址，要再次发请求
    src = img.get('src')
    file_name = src.rsplit('__', maxsplit=1)[1]

    ret_img = requests.get(url="https:"+src)
    with open(file_name, 'wb') as f:
        f.write(ret_img.content)
    # print(ret_img.content)

　　　2. 抽屉登录

# coding=utf-8
# Version:python3.6.0
# Tools:Pycharm 2017.3.2
# author ="wlx"
__date__ = '2018/6/13 10:59'
import requests
# 网页浏览器工作流程，第一步先访问主页，然后主页返回一个未授权的cookie，然后发送post请求携带着用户名密码和为授权的cookie登陆，
# 登陆后，网页对未授权的cookie进行授权，第一次给的未授权的cookie则可用了
# 1向网址发送get请求，有防爬虫防火墙，所以要带上头部headers表明自己是走浏览器发的请求
ret = requests.get(
    url="https://dig.chouti.com/",
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
    }
)
ret_cookie_dict = ret.cookies.get_dict()


# 2向网址发送post请求，并获取cookie值
response = requests.post(
    url='https://dig.chouti.com/login',
    data={
        'phone': '8618846453138',
        'password': 'we18846453138',
        'oneMonth': '1'
    },
    headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
    },
    cookies=ret_cookie_dict  # 携带初始访问网页的cookies登陆，使之被授权
)
# cookie_dict = response.cookies.get_dict()  # 获取cookie 第二次访问得到的cookie在此处无用

r1 = requests.post(
    url='https://dig.chouti.com/link/vote?linksId=20217671',
    headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
    },
    cookies=ret_cookie_dict  # 注意是cookies不是cookie，使用的是授权后的cookies
)
print(r1.text)

　　3. requests模块

# params:URL中传入参数
import requests
'''
请求头：http://www/oldboyedu.com    headers={} ...
请求体：用data传请求体内部会转换为name=alex&age=18 用json传请求体的话传字符串'{"name":"alex","age":18}'
标志： 一般Form Data需发送data类型请求体，payload需发送json类型请求体
'''
requests.request(method='get', url='http://127.0.0.1:8000/test/')
requests.request(method='post', url='http://127.0.0.1:8000/test/')
requests.get(url='x')  # 等价于requests.request(method='get', url='x')
requests.post(url='x')  # 等价于requests.request(method='post', url='x')
requests.get(url='http://www/oldboyedu.com', params={"nid": 1, 'name': 'x'}, headers={}, cookie={})  # data和json都能传请求体，具体区别见上
# json={"name":"alex","age":18}   data=json.dumps({"name":"alex","age":18})//json其实就是帮忙做一个json.dump的操作
requests.post(url='http://www/oldboyedu.com', params={"nid": 1, 'name': 'x'}, data={"name":"alex","age":18}, headers={}, cookie={})
# param向URL中传参上面的URL相当于https://www.oldboyedu.com?nid=1&name=x
'''
模块
    requests
        method:
        url:
        params
        data:
        json:
        headers:
        cookies:
        proxies:封ip，用代理，代理别写死，买十分代理每次发请求随机选择一个代理发送有的代理还要authorize认证auth = HTTPProxyAuth('username', 'mypassword')
            r = requests.get("http://www.google.com", proxies=proxyDict, auth=auth)
        
        file:上传文件
        auth:基本认证
        timeout:相应超时等，设定超时时间
        allow_redricts:True
        stream: 下载大文件时分开下载
            ret = requests.get('http://127.0.0.2:8000/test/', stram = True)
            for i in r.iter_content():
                print(i)
            from contextlib import closing
            with closing(requests.get('http://httpbin.org/get'), stream=True) as r:
                # 在此处理相应
                for i in r.iter_content():
                    print(i)
        cert:证书
        verify:确认
    参考：https://www.cnblogs.com/wupeiqi/articles/6283017.html            
'''
'''
def request(method, url, **kwargs):
    """Constructs and sends a :class:`Request <Request>`.

    :param method: method for the new :class:`Request` object.
    :param url: URL for the new :class:`Request` object.
    :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
    :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
    :param json: (optional) json data to send in the body of the :class:`Request`.
    :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
    :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
    :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload.
        ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')``
        or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string
        defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers
        to add for the file.
    :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
    :param timeout: (optional) How long to wait for the server to send data
        before giving up, as a float, or a :ref:`(connect timeout, read
        timeout) <timeouts>` tuple.
    :type timeout: float or tuple
    :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
    :type allow_redirects: bool
    :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
    :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True``.
    :param stream: (optional) if ``False``, the response content will be immediately downloaded.
    :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
    :return: :class:`Response <Response>` object
    :rtype: requests.Response

    Usage::

      >>> import requests
      >>> req = requests.request('GET', 'http://httpbin.org/get')
      <Response [200]>
  参考：https://www.cnblogs.com/wupeiqi/articles/6283017.html     
'''

posted @ 2018-07-05 22:10 醉生卐梦死阅读(747) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

醉生卐梦死

精通各种语言的hello world！

github自动登录

最终实现代码

所学知识

公告