第二次作业

 

一、

 

(1)在中国气象网(http://www.weather.com.cn)给定城市集的7日天气预报,并保存在数据库。

from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
import urllib.request
import sqlite3

class WeatherDB:
    def openDB(self):
        self.con = sqlite3.connect("weathers.db")
        self.cursor = self.con.cursor()
        try:
            self.cursor.execute("create table weathers (wNum varchar(16),wCity varchar(16),wDate varchar(16),wWeather varchar(64),wTemp varchar(32),constraint pk_weather primary key (wCity,wDate))")
        except:
            self.cursor.execute("delete from weathers")

    def closeDB(self):
        self.con.commit()
        self.con.close()

    def insert(self,num,city,date,weather,temp):
        try:
            self.cursor.execute("insert into weathers (wNum,wCity,wDate,wWeather,wTemp) values(?,?,?,?,?)",(num,city,date,weather,temp))
        except Exception as err:
            print(err)

    def show(self):
        self.cursor.execute("select * from weathers")
        rows = self.cursor.fetchall()
        print("%-16s%-16s%-16s%-32s%-16s"%("序号","地区","日期","天气信息","温度"))
        for row in rows:
            print("%-16s%-16s%-16s%-32s%-16s"%(row[0],row[1],row[2],row[3],row[4]))

class WeatherForecast:
    def __init__(self):
        self.headers={
            "User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre"
        }
        self.cityCode={"北京":"101010100","上海":"101020100","广州":"101280101","深圳":"101280601"}

    def forecastCity(self,city):
        if city not in self.cityCode.keys():
            print(city+" code cannot be found")
            return
        url="http://www.weather.com.cn/weather/"+self.cityCode[city]+".shtml"
        try:
            req=urllib.request.Request(url,headers=self.headers)
            data=urllib.request.urlopen(req)
            data=data.read()
            dammit=UnicodeDammit(data,["utf-8","gbk"])
            data=dammit.unicode_markup
            soup=BeautifulSoup(data,"lxml")
            lis=soup.select("ul[class='t clearfix'] li")
            num=0
            for li in lis:
                try:
                    num=num+1
                    data=li.select('h1')[0].text
                    weather=li.select('p[class="wea"]')[0].text
                    temp=li.select('p[class="tem"] span')[0].text+"/"+li.select('p[class="tem"] i')[0].text
                    print(num,city,data,weather,temp)
                    self.db.insert(num,city,data,weather,temp)
                except Exception as err:
                    print(err)
        except Exception as err:
            print(err)

    def process(self,cities):
        self.db=WeatherDB()
        self.db.openDB()

        for city in cities:
            self.forecastCity(city)

        self.db.show()
        self.db.closeDB()

ws=WeatherForecast()
ws.process(["北京","上海","广州","深圳"])
print("完成")

 

心得体会

本次作业的代码主要是参考课本上的例题,对书本给出的代码进行理解和复现,约会了数据库的应用

作业二:

 

(1)用requests和BeautifulSoup库方法定向爬取股票相关信息。

  import re

  import requests

  def getHtml(fs, pn):#fs控制股票号,pn控制页数
      url = "http://56.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112409968248217612661_1601548126340&pn=" + str(pn) +   "&pz=20&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&fid=f3&fs=" + fs + "&fields=f12,f14,f2,f3,f4,f5,f6,f7"
      r = requests.get(url)
      pat = '"diff":\[\{(.*?)\}\]'
      data = re.compile(pat, re.S).findall(r.text)
      return data
  # 获取股票数据
  print("序号\t代码\t名称\t最新价\t涨跌幅\t跌涨额\t成交量\t成交额\t涨幅")
  def getOnePageStock(sort, fs, pn):
      data = getHtml(fs, pn)
      datas = data[0].split("},{")
      for i in range(len(datas)):
          line = datas[i].replace('"', "").split(",")  

          print(sort, line[6][4:], line[7][4:], line[0][3:], line[1][3:], line[2][3:], line[3][3:], line[4][3:], line[5][3:]) #  输出
          sort += 1
      return sort
  def main():
      sort = 1# 代码序号
      pn = 1 #页数
fs = {
    "沪深A股": "m:0+t:6,m:0+t:13,m:0+t:80,m:1+t:2,m:1+t:23",
    "上证A股": "m:1+t:2,m:1+t:23",
    "深证A股": "m:0+t:6,m:0+t:13,m:0+t:80",
    "新股": "m:0+f:8,m:1+f:8",
    "中小板": "m:0+t:13",
    "创业板": "m:0+t:80",
    "科技版": "m:1+t:23"
}  #要爬取的股票
      for i in fs.keys():
          sort = getOnePageStock(sort, fs[i], pn)

  main()

 

 

 

 心得体会:与之前作业不同的是,本次作业需要抓取js信息,学会了使用抓包工具获取网页数据。

 三、

 

(1)根据自选3位数+学号后3位选取股票,获取印股票信息。

 

 

  import re

  import requests
  def getHtml(sort):
      url = "http://push2.eastmoney.com/api/qt/stock/get?      ut=fa5fd1943c7b386f172d6893dbfba10b&invt=2&fltt=2&fields=f44,f45,f46,f57,f58&secid=0."+sort+"&cb=jQuery112409396991179940428_1601692476366"
      r = requests.get(url)
      data = re.findall('{"f.*?}', r.text)
      return data
  # 获取股票数据
  print("代码\t名称\t今日开\t今日最高\t今日最低")
  def getOnePageStock(sort):
      data = getHtml(sort)
      datas = data[0].split("},{")  
      for i in range(len(datas)):
          line = datas[i].replace('"', "").split(",")  
          print(line[3][4:], line[4][4:8], line[2][4:], line[0][5:], line[1][4:])  
  def main():
      sort = "002129"  # 并不是每股股票都存在,事先选择一支存在的股票
      try:
          getOnePageStock(sort)
      except:
          print("该股票不存在!")   
  main()

 心得体会:在上一次作业的基础上,手动选择股票代码

 

posted @ 2020-10-09 21:53  ww嘤  阅读(171)  评论(0编辑  收藏  举报