请求库-request使用

# -*- coding: utf-8 -*-
import requests
from urllib.parse import urlencode

# python模仿百度搜索引擎
# keyword = input(">>:").strip()
# res = urlencode({"wd": keyword}, encoding="utf-8")
# url = "https://www.baidu.com/s?" + res
# print(url)
# response = requests.get(url,
#                         params={
#                             "wd":keyword,     #搜索关键词
#                             "pn":20           #类似于页码
#                         },
#                         headers={
#                             # 假如没有带user-agent则获取不到页面信息
#                             "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
#                         },
#                         )
# with open("tt.html", "w", encoding="utf-8") as f:
#     f.write(response.text)


# github免密码登录
# 带着第一次访问网页的cookie去请求页面，即使不用登录也可以访问资源

response = requests.get(
    url="https://github.com/settings/emails",
    headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36",
            "Cookie": "_octo=GH1.1.892890929.1501206039; logged_ixxxxs; dotcom_user=522338473; _ga=GA1.2.11058xxxxx501206039; user_session=xxxxxxxxxOXK_aYSafJ4eG2nSniFW8Cs; __Host-user_session_same_site=eeNPhtNVe46wW48GpyotWT_pbOXK_aYSafJ4eG2nSniFW8Cs; tz=Asia%2FShanghai; _gat=1; _gh_sess=eyJxxxx9uX2lkIjoiZmVlOGZlZmRiYjJxxxxxDM1MWExYzg1ZWMzYzQiLCJsYXN0X3JlYWRfZnJvbV9yZXBsaWNhcyI6MTUxNjAyMzIzMDQ2OCwiY29udGV4dCI6Ii8iLCJyZXR1cm5fdG8iOiIvc2V0dGluZ3MvZW1haWxzIn0%3D--45e20d559f7c0df0ee8959a1796dcdd3780f71bb",
    },
allow_redirects=False,     #假如url中有location.href，为flask，不跳转
)
print("522338473@qq.com" in response.text)

有些网站解析到本地会出现编码问题导致的乱码，用chardet可以完美的解决
直接可以将chardet探测到的编码，赋值给response.encoding实现解码，当输出text
文档的时候就不会出现乱码了，
import requests
response = requests.get("https://www.cnblogs.com")
import chardet             
print(chardet.detect(response.content))
response.encoding = chardet.detect(response.content)["encoding"]
print(response.text)

除此之外还有一种基于流 的模式
import requests
response = requests.get("https://www.baidu.com",stream=True)
print(response.raw.read)
设置stream=True标志位，使得响应以字节流的方式进行读取，response.read函数可以指定读取的字节数

posted @ 2018-01-15 22:01 一石数字欠我15w!!! 阅读(605) 评论(0) 编辑收藏举报

刷新页面返回顶部

一石数字欠我15W❗

画饼四年不涨薪没事。但是他贪污我10000垫付款不行❗

请求库-request使用

公告