python爬虫练习

#百度热搜
#
-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://top.baidu.com/buzz?b=1&fr=topindex" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) respond = r.text soup = BeautifulSoup(respond,"html.parser") # HotSearchs = soup.find_all("td",class_="keyword") # HotSearchs = soup.select("td[class='keyword']") HotSearchs = soup.find_all("tr")[1:] for HotSearch in HotSearchs: if HotSearch.find(class_ ="list-title") != None: title = HotSearch.find(class_ = "list-title").text.encode("iso-8859-1").decode("gbk") number = HotSearch.find(class_ = "last").text.strip() content.append([title,number]) print(content)

 

#天气预报
#
-*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup url = "http://www.weather.com.cn/weather/101010100.shtml" header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", } content = [] r = requests.get(url,headers = header) r.encoding = "utf-8" respond = r.text soup = BeautifulSoup(respond,"html.parser") TodayWeather = soup.select("ul[class='t clearfix']") for weather in TodayWeather: title = weather.select("h1")[0].text wea = weather.select(".wea")[0].text tem = weather.select(".tem")[0].text.strip() win = weather.select(".win")[0].text.strip() content.append([title,wea,tem,win]) print(content)

 

posted @ 2020-07-06 15:15  南山散人  阅读(222)  评论(0编辑  收藏  举报