Python request
request
requests¶
get请求¶
In [ ]:
import requests
response=requests.get('http://www.baidu.com')#get请求
In [ ]:
response.status_code #响应状态码
In [ ]:
response.url#请求的网址
In [ ]:
response.headers#响应头
In [ ]:
response.cookies#cookies
requests.utils.dict_from_cookiejar(response.cookies)
In [ ]:
response.content#字节流形式
In [ ]:
response.text#输出正文
带参数的get请求¶
In [ ]:
#带参数的请求方式
#方法1:直接把参数写进url
import requests
response = requests.get('http://httpbin.org/get?name=Jim&age=22')#直接把参数写进url
print(response.text)
In [ ]:
response.url
In [ ]:
#方法2:将参数填写在dict中,发起请求时params参数指定为dict
data = {'name': 'tom','age': 20}
response = requests.get('http://httpbin.org/get', params=data)
print(response.text)
In [ ]:
#百度中搜索关键字
data = {'wd':'python'}
response = requests.get('http://www.baidu.com/s?', params=data)
print(response.text)
In [ ]:
#百度中搜索多个关键字
words=['python','java','c','matlab']
for word in words:
data = {'wd':word}
response = requests.get('http://www.baidu.com/s?', params=data)
print(response.url)
post请求¶
In [ ]:
#post请求,用post方法
import requests
data = {'name':'jim','age':'22'}
response = requests.post('http://httpbin.org/post', data=data)
print(response.text)
In [ ]:
response.url
提取链家房源面积和价格并排序¶
In [ ]:
import requests
import re
url='https://bj.lianjia.com/zufang/'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=header)
html=response.text
In [ ]:
response.status_code
In [ ]:
html
In [ ]:
nameregex=re.compile('alt="(.*?)"\n')
name = nameregex.findall(html) #找出所有小区名字
arearegex=re.compile('([0-9.]+)㎡\n')
area = arearegex.findall(html) #找出面积
priceregex=re.compile('<em>([0-9.]+)</em> 元/月')
price = priceregex.findall(html) #找出价格
In [ ]:
len(name)
In [ ]:
len(area)
In [ ]:
len(price)
In [ ]:
import pandas as pd
datalist=[]
for i in range(len(name)):
datalist.append([name[i],float(area[i]),float(price[i])])
df=pd.DataFrame(datalist,columns=['name','area','price']).set_index('name')
In [ ]:
df
In [ ]:
df.info()
In [ ]:
df.sort_values('area')#对面积进行排序
In [ ]:
df.sort_values('price')#对价格进行排序
In [ ]:
#完整代码
import requests
import re
import pandas as pd
url='https://sz.lianjia.com/zufang/'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=header)
html=response.text
nameregex=re.compile(r'alt="(.*?)"\n')
name = nameregex.findall(html) #找出所有小区名字
arearegex=re.compile(r'([0-9.]+)㎡\n')
area = arearegex.findall(html) #找出面积
priceregex=re.compile(r'<em>([0-9.]+)</em> 元/月')
price = priceregex.findall(html) #找出价格
datalist=[]
for i in range(len(name)):
datalist.append([name[i],float(area[i]),float(price[i])])
df=pd.DataFrame(datalist,columns=['name','area','price']).set_index('name')
df.sort_values('area')#对面积进行排序
df.sort_values('price')#对价格进行排序
链家爬取多页¶
In [ ]:
import requests
import re
import time
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
data=pd.DataFrame()
import random
for i in range(1,11):#爬取10页
print('正在爬取第%d页'%i)
baseurl='https://bj.lianjia.com/zufang/pg'
url=baseurl+str(i)+'/#contentList'
response = requests.get(url, headers=header)
html=response.text
nameregex=re.compile('alt="(.*?)"\n')
name = nameregex.findall(html) #找出所有小区名字
arearegex=re.compile('([0-9]+)㎡\n')
area = arearegex.findall(html) #找出面积
priceregex=re.compile('<em>([0-9.]+)</em> 元/月')
price = priceregex.findall(html) #找出价格
datalist=[]
for i in range(len(name)):
datalist.append([name[i],float(area[i]),float(price[i])])
df=pd.DataFrame(datalist)
data=pd.concat([data,df])
time.sleep(random.randint(6,7))
print('爬取完毕')
data.columns=['name','area','price']
data.set_index('name').sort_values('area')
In [ ]:
time.time()
In [ ]:
time.time()
有道翻译 post请求¶
In [ ]:
import requests
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
#http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule
formdata = {'i':'中国',
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client': 'fanyideskweb',
'salt':'1541165120374',
'sign':'5bbeca852044319291d932d4bfe92564',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'
}
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.post(url, data=formdata,headers=header)
print(response.json()['translateResult'][0][0]['tgt'])
In [ ]:
response.json()['translateResult'][0][0]['tgt']
In [ ]:
response.json()['translateResult'][0][0]['tgt']
界面形式¶
In [ ]:
#界面形式
content=input('请输入内容:')
import requests
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null'
formdata = {'i':content,
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client': 'fanyideskweb',
'salt':'1541165120374',
'sign':'5bbeca852044319291d932d4bfe92564',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'
}
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.post(url, data=formdata,headers=header)
print(response.json()['translateResult'][0][0]['tgt'])
In [ ]:
#翻译多个内容
import requests
import time
url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&smartresult=ugc&sessionFrom=null'
contents=['中国','美国','英国','法国']
for content in contents:
formdata = {'i':content,
'from':'AUTO',
'to':'AUTO',
'smartresult':'dict',
'client': 'fanyideskweb',
'salt':'1541165120374',
'sign':'5bbeca852044319291d932d4bfe92564',
'doctype':'json',
'version':'2.1',
'keyfrom':'fanyi.web',
'action':'FY_BY_REALTIME',
'typoResult':'false'
}
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.post(url, data=formdata,headers=header)
print(response.json()['translateResult'][0][0]['tgt'])
time.sleep(5)
猫眼电影排行榜¶
单页爬取¶
In [ ]:
import requests
import re
import pandas as pd
url='https://maoyan.com/board/4?offset=0'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url, headers=header)
html=response.text
In [ ]:
html
In [ ]:
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = pattern.findall(html)
In [ ]:
items
In [ ]:
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
In [ ]:
df
In [ ]:
url='https://maoyan.com/board/4?offset=0'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url,headers=header)
html=response.content.decode('utf-8')
In [ ]:
html
In [ ]:
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = re.findall(pattern, html)
In [ ]:
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
df['actors']=df['actors'].apply(lambda x:x.strip())
df
In [ ]:
df.info()
In [ ]:
df['score1']
In [ ]:
df['score']=df['score1']+df['score2']
In [ ]:
df.drop(['score1','score2'],axis=1)
In [ ]:
#单页完整代码
url='https://maoyan.com/board/4?offset=0'
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url,headers=header)
html=response.content.decode('utf-8')
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = re.findall(pattern, html)
df=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
df['actors']=df['actors'].apply(lambda x:x.strip())
df['score']=df['score1']+df['score2']
df.drop(['score1','score2'],axis=1)
In [ ]:
html
In [ ]:
startpattern = re.compile('"?offset=(.*?)"\n >下一页')
start = re.findall(startpattern, html)[0]#10
In [ ]:
start
In [ ]:
猫眼多页爬取¶
In [ ]:
#爬取多页,方法1
import random
df=pd.DataFrame()
for i in range(10):
print('正在爬取第%d页'%i)
baseurl='https://maoyan.com/board/4?offset='
url=baseurl+str(i*10)
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url,headers=header)
if response.status_code==200:
html=response.content.decode('utf-8')
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = re.findall(pattern, html)
data=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
df=pd.concat([df,data])
time.sleep(random.randint(5,7))
df['actors']=df['actors'].apply(lambda x:x.strip())
df['score']=df['score1']+df['score2']
df=df.drop(['score1','score2'],axis=1)
df
In [ ]:
#爬取多页,方法2,优先考虑
#多页完整代码
import pandas as pd
import re
import time
import requests
import random
df=pd.DataFrame()
start='0'
while True:
try: #异常处理
print('正在爬取第%d页'%int(start))
baseurl='https://maoyan.com/board/4?offset='
url=baseurl+start
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
response = requests.get(url,headers=header)
if response.status_code==200:
html=response.content.decode('utf-8')
pattern = re.compile('<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',re.S)#使.匹配包括换行在内的所有字符
items = re.findall(pattern, html)
data=pd.DataFrame(items,columns=['rank','url','title','actors','time','score1','score2']).set_index('rank')
df=pd.concat([df,data])
time.sleep(random.randint(5,7))
startpattern = re.compile('"?offset=(.*?)"\n >下一页')
start = re.findall(startpattern, html)[0]#10
print(start)
except:
break
df['actors']=df['actors'].apply(lambda x:x.strip())
df['score']=df['score1']+df['score2']
df=df.drop(['score1','score2'],axis=1)
df
In [ ]:
舌尖中国 短评提取¶
In [ ]:
url = 'https://movie.douban.com/subject/25875034/comments?start=0&limit=20&sort=new_score&status=P'
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式
response = requests.get(url,headers=headers)
html=response.text#查看网页源码
html
In [ ]:
reg = 'class="short">(.*?)</span>'#进行匹配
reg = re.compile(reg,re.S)#匹配包括换行符在内的所有
data = re.findall(reg, html)
data
In [ ]:
start='0'
baseurl='https://movie.douban.com/subject/25875034/comments?start='+start+'&limit=20&sort=new_score&status=P'
start = re.findall(r'<a href="\?start=(\d+)&limit=20&sort=new_score&status=P&percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0]
start
In [ ]:
#完整代码,爬取多页
import os
os.chdir(r'C:\Users\chenh\Desktop')
import requests
import re
import time
import random
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式
start = '0'
while True:
try:
time.sleep(random.randint(5, 10))
url = 'https://movie.douban.com/subject/25875034/comments?start=' + start + '&limit=20&sort=new_score&status=P&percent_type='
response = requests.get(url, headers=headers)
html=response.text
reg = 'class="short">(.*?)</span>'#进行匹配
reg = re.compile(reg,re.S)#匹配包括换行符在内的所有
data = re.findall(reg, html)
start = re.findall(r'<a href="\?start=(\d+)&limit=20&sort=new_score&status=P&percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0]
if response.status_code == 200:
print('正在获取start为' + start + '的数据')
for one_data in data:
with open(r'豆瓣舌尖中国.txt', 'a', encoding='utf-8') as f:#上下文管理
f.write(one_data + '\n')
except Exception as e:
print(e)
break
In [ ]:
open
In [ ]:
import wordcloud
In [ ]:
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
f.close()
In [ ]:
txt.decode('utf-8')
In [ ]:
txt1='小罐茶的导演拍的\r\n真的难看 ,花打四门讲的这么牛逼'
In [ ]:
words = jieba.lcut(txt1) #结巴
words
In [ ]:
newtxt= ' '.join(words)
newtxt
In [ ]:
#词云图分析
import os
%matplotlib inline
os.chdir(r'D:\CDA\File')
import wordcloud
import matplotlib.pyplot as plt
import jieba
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
f.close()
words = jieba.lcut(txt)#列表,列表里面的元素为字符串
newtxt= ' '.join(words)
w=wordcloud.WordCloud(width=1000,height=700,font_path="C:/Windows/Fonts/STLITI.ttf",background_color = 'white')
wordcloud=w.generate(newtxt)
plt.imshow(wordcloud)
plt.axis('off')
In [ ]:
txt.decode('utf-8')
In [ ]:
#词云图分析,加载背景图片
import os
%matplotlib inline
from wordcloud import STOPWORDS
os.chdir(r'D:\CDA\File')
import wordcloud
import matplotlib.pyplot as plt
import jieba
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
words = jieba.lcut(txt)#对字符串进行分词,输出为列表
newtxt= ' '.join(words)#将列表转换为字符串
backgroud_Image = plt.imread('mask.jpg')
w=wordcloud.WordCloud(width=1000,height=700,mask=backgroud_Image,max_words=200,stopwords=STOPWORDS,font_path="C:/Windows/Fonts/STHUPO.TTF",background_color = 'white')
wordcloud=w.generate(newtxt)
plt.imshow(wordcloud)
plt.axis('off')
In [ ]:
#完整代码,词云图分析
import os
os.chdir(r'D:\CDA\File')
import requests
import re
import time
import random
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36','cookie':'bid=sr4hL6ULJZA; __utmc=30149280; __utmz=30149280.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1559699651.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1559719122%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHVNFSp7Kedg2iff8jV5Gh4apCdh4px7KJUDsPVN9jQGvd450yUr88E_vy1zDblW8yg48uI39AjTPYbcSfQRKk4IAY97KC7f4AIFxKb7GU4uiGBXFFuQW6h_AO5bwlYeL7mjmYu4oHF56u1iSom-xBa%26wd%3D%26eqid%3Dd969767100044ab2000000065cf720c1%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.1296672229.1559699651.1559699651.1559719122.2; __utma=223695111.1073407875.1559699651.1559699651.1559719122.2; __utmb=223695111.0.10.1559719122; ap_v=0,6.0; __utmb=30149280.1.10.1559719122; _pk_id.100001.4cf6=275e5934e733acc1.1559699651.2.1559719523.1559699691.'}#字典格式
start = '0'
while True:
try:
time.sleep(random.randint(5, 10))
url = 'https://movie.douban.com/subject/25875034/comments?start=' + start + '&limit=20&sort=new_score&status=P&percent_type='
response = requests.get(url, headers=headers)
html=response.text
reg = 'class="short">(.*?)</span>'#进行匹配
reg = re.compile(reg,re.S)#匹配包括换行符在内的所有
data = re.findall(reg, html)
start = re.findall(r'<a href="\?start=(\d+)&limit=20&sort=new_score&status=P&percent_type=" data-page="" class="next">后页 ></a>',html, re.S)[0]
if response.status_code == 200:
print('正在获取start为' + start + '的数据')
for one_data in data:
with open(r'豆瓣舌尖中国.txt', 'a', encoding='utf-8') as f:#上下文管理
f.write(one_data + '\n')
except Exception as e:
print(e)
break
#词云图分析
import os
%matplotlib inline
os.chdir(r'D:\CDA\File')
import wordcloud
import matplotlib.pyplot as plt
import jieba
f=open("豆瓣舌尖中国.txt","rb")
txt=f.read()
f.close()
words = jieba.lcut(txt)#列表,列表里面的元素为字符串
newtxt= ' '.join(words)
w=wordcloud.WordCloud(width=1000,height=700,font_path="C:/Windows/Fonts/STLITI.ttf",background_color = 'white')
wordcloud=w.generate(newtxt)
plt.imshow(wordcloud)
plt.axis('off')
用API获取天气预报¶
https://dev.heweather.com/docs/api/weather¶
In [ ]:
import requests
city = 'beijing'
url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key=a0ebb9fb3c2540f29065d187b0121694'.format(city)
response = requests.get(url)
response.text
In [ ]:
import json
dic = json.loads(response.text)
dic['HeWeather6'][0]['daily_forecast']
In [ ]:
import json
dic = json.loads(response.text)
#今天的最高温度
dic['HeWeather6'][0]['daily_forecast'][2]['tmp_max']
In [ ]:
#完整代码
import requests
city = 'beijing'
url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key=a0ebb9fb3c2540f29065d187b0121694'.format(city)
response = requests.get(url)
import json
dic = json.loads(response.text)
#打印输出
for i in range(3):
print(dic['HeWeather6'][0]['daily_forecast'][i]['date'])
print(dic['HeWeather6'][0]['daily_forecast'][i]['cond_txt_d'])
print(dic['HeWeather6'][0]['daily_forecast'][i]['wind_dir'])
print(dic['HeWeather6'][0]['daily_forecast'][i]['tmp_max'])
print('----------------')
In [ ]:
从文件中读入城市列表,显示每个城市的未来三天的最高温度¶
In [ ]:
import requests
import json
import time
import random
import pandas as pd
key = 'a0ebb9fb3c2540f29065d187b0121694'
import os
os.chdir(r'D:\CDA\File')
data = pd.read_csv('china-city-list.txt',delimiter='|')
data.columns
In [ ]:
data
In [ ]:
data[' 城市英文 ']
In [ ]:
In [ ]:
In [ ]:
import requests
import json
import time
import random
import pandas as pd
key = 'a0ebb9fb3c2540f29065d187b0121694'
import os
os.chdir(r'D:\CDA\File')
data = pd.read_csv('china-city-list.txt',delimiter='|')
cities=data[' 城市英文 ']
#定义list,存放所有数据,最后转成DataFrame
lst = []
for city in cities[:40]:
url = 'https://free-api.heweather.com/s6/weather/forecast?location={}&key={}'.format(city, key)
#print(url)
response = requests.get(url)
city_dic = json.loads(response.text)
#print(city_dic)
#得到城市名
city_name = city_dic['HeWeather6'][0]['basic']['location']
print(city_name)
#遍历该城市的三天的天气预报
for item in city_dic['HeWeather6'][0]['daily_forecast']:
#print(city_name, item['date'], item['tmp_max'], item['tmp_min'])
lst.append([city_name, item['date'], item['cond_txt_d'],item['tmp_max'], item['tmp_min']])
time.sleep(random.randint(2,3))
#写到文件中
df = pd.DataFrame(lst, columns = ['名称', '日期','天气','最高温度', '最低温度'])
df
In [ ]:
city_dic
In [ ]:
df
In [ ]: