批量爬取中国银行的汇率到数据库,可以指定时间段的爬取 下面是指定2024-1-1 至 2024-07-15这个时间段的汇率。

爬取中国银行汇率到数据库---可以批量指定日期段

 

import random

import requests
from datetime import datetime, timedelta
import logging
import re
import time
from datetime import datetime
import psycopg2
from sshtunnel import SSHTunnelForwarder


class GetRat:
    def __init__(self):
        self.url = 'https://srh.bankofchina.com/search/whpj/search_cn.jsp'

    def get_rate(self):
        """
        获取汇率数据
        """
        url = 'https://srh.bankofchina.com/search/whpj/search_cn.jsp'
        string_time = time.strftime('%Y.%m.%d ', time.localtime()) + '09:45:00'
        timearray = time.strptime(string_time, "%Y.%m.%d %H:%M:%S")
        customtime = int(time.mktime(timearray))
        input = time.strftime('%Y-%m-%d ', time.localtime())
        midill_list = []
        finally_list = []

        try:
            for i in range(1, 15):
                data = {'erectDate': input,
                        'nothing': input,
                        'pjname': '美元',
                        'page': i,
                        }
                respons = requests.post(url, data).text
                res = re.findall(r'<td>(.*?)</td>', respons, re.S)

                res_list = [res[i:i + 7] for i in range(0, len(res), 7)]
                if len(midill_list) == 0:
                    midill_list.append(res_list)
                else:
                    if res_list == midill_list[-1]:
                        break
                    else:
                        midill_list.append(res_list)

            for i in midill_list:
                for p in i:

                    timeArray = time.strptime(p[-1].split('\r')[0], "%Y.%m.%d %H:%M:%S")
                    timeStamp = int(time.mktime(timeArray))
                    if customtime > timeStamp:
                        finally_list.append(p)
            time.sleep(1)
            value_currency = "$"
            # value_rate = float("%.4f" % (float(str(finally_list[1][-2])))) / 100
            # print(finally_list)
            # if len(finally_list[1]) > 2:
            value_rate = float(f"{float(finally_list[1][-2]) / 100:.4f}")  
      #获取到的是中行的折算价,如果要获取到其它的汇率请调整[-2]下标
现汇买入价1现钞买入价2现汇卖出价3现钞卖出价4中行折算价5发布时间6

time.sleep(
1) time_value = finally_list[1][-1].replace('\r\n', '').rstrip() return value_rate, time_value, value_currency # else: # print("获取汇率失败!", finally_list[1][-1].replace('\r\n', '').rstrip()) # return None except Exception as e: raise e def execute_non_query_br(self, SQLstr): # SSH 和数据库配置 ssh_host = '47.47.47.47' # SSH 服务器的IP地址 ssh_username = 'odoo' ssh_password = '######' db_user = 'oooo_user' db_password = '3333333#' db_name = 'BBB' tunnel = None conn = None try: # 建立 SSH 隧道 tunnel = SSHTunnelForwarder( (ssh_host, 22), # SSH 服务器的地址和端口 ssh_username=ssh_username, ssh_password=ssh_password, remote_bind_address=('localhost', 5432) # PostgreSQL 服务器的地址和端口 ) tunnel.start() # 建立数据库连接 conn = psycopg2.connect( host='localhost', # 因为我们绑定到了本地 port=tunnel.local_bind_port, # 使用隧道提供的本地端口 user=db_user, password=db_password, dbname=db_name, # sslmode='require' # 禁用 SSL ) # 执行数据库操作 cur = conn.cursor() cur.execute(SQLstr) conn.commit() return True except Exception as e: logging.error('导入过程出错: %s', e) return False finally: # 确保关闭所有连接 if cur is not None: cur.close() if conn is not None: conn.close() if tunnel is not None: tunnel.stop() # 执行函数 def job_br(): try: obj = GetRat() start_date = datetime.strptime('2024-01-12', '%Y-%m-%d') end_date = datetime.strptime('2024-07-15', '%Y-%m-%d') current_date = start_date count = 0 while current_date <= end_date: date_str = current_date.strftime('%Y-%m-%d') # 模拟系统时间为current_date,以便get_rate获取对应日期的汇率 orig_time = time.localtime time.localtime = lambda: current_date.timetuple() message = obj.get_rate() time.localtime = orig_time # 恢复系统时间 if message and len(message) > 1 and message[-1] == '$': rate = message[0] timestamp = message[1] create_write_date = current_date.strftime('%Y-%m-%d %H:%M:%S') sql_str = 'INSERT INTO br_exchange_rate(crawler_time, crawler_day, rate, create_date, create_uid, write_date, write_uid) VALUES ' \ + f"('{timestamp}', '{timestamp[:10]}', {rate:.4f}, '{create_write_date}', 2, '{create_write_date}', 2)" print('在处理:', date_str, '的汇率为:', rate) logging.info(sql_str) obj.execute_non_query_br(SQLstr=sql_str) logging.info('汇率已经写入数据库!') msg_all = f'汇率:{rate:.4f} | 时间:{timestamp}\r\n' with open(r'huilv.txt', mode='a', encoding="utf-8") as file_handle: file_handle.write(msg_all) file_handle.write('\n') else: obj.send_mail(f"警告: {date_str} 的汇率数据库没有获取到数据,请检查状态!!!") current_date += timedelta(days=1) count += 1 # 每处理3个日期后,休息随机的时间(1到2分钟) if count % 3 == 0: sleep_time = random.randint(60, 120) logging.info(f"休息 {sleep_time} 秒...") time.sleep(sleep_time) except Exception as f: logging.error(f"An error occurred: {f}") raise f # 执行函数 job_br()

 

posted @ 2024-07-15 15:08  *感悟人生*  阅读(16)  评论(0编辑  收藏  举报