批量爬取中国银行的汇率到数据库,可以指定时间段的爬取 下面是指定2024-1-1 至 2024-07-15这个时间段的汇率。
爬取中国银行汇率到数据库---可以批量指定日期段
import random import requests from datetime import datetime, timedelta import logging import re import time from datetime import datetime import psycopg2 from sshtunnel import SSHTunnelForwarder class GetRat: def __init__(self): self.url = 'https://srh.bankofchina.com/search/whpj/search_cn.jsp' def get_rate(self): """ 获取汇率数据 """ url = 'https://srh.bankofchina.com/search/whpj/search_cn.jsp' string_time = time.strftime('%Y.%m.%d ', time.localtime()) + '09:45:00' timearray = time.strptime(string_time, "%Y.%m.%d %H:%M:%S") customtime = int(time.mktime(timearray)) input = time.strftime('%Y-%m-%d ', time.localtime()) midill_list = [] finally_list = [] try: for i in range(1, 15): data = {'erectDate': input, 'nothing': input, 'pjname': '美元', 'page': i, } respons = requests.post(url, data).text res = re.findall(r'<td>(.*?)</td>', respons, re.S) res_list = [res[i:i + 7] for i in range(0, len(res), 7)] if len(midill_list) == 0: midill_list.append(res_list) else: if res_list == midill_list[-1]: break else: midill_list.append(res_list) for i in midill_list: for p in i: timeArray = time.strptime(p[-1].split('\r')[0], "%Y.%m.%d %H:%M:%S") timeStamp = int(time.mktime(timeArray)) if customtime > timeStamp: finally_list.append(p) time.sleep(1) value_currency = "$" # value_rate = float("%.4f" % (float(str(finally_list[1][-2])))) / 100 # print(finally_list) # if len(finally_list[1]) > 2: value_rate = float(f"{float(finally_list[1][-2]) / 100:.4f}")
#获取到的是中行的折算价,如果要获取到其它的汇率请调整[-2]下标
现汇买入价1 | 现钞买入价2 | 现汇卖出价3 | 现钞卖出价4 | 中行折算价5 | 发布时间6 |
---|
time.sleep(1) time_value = finally_list[1][-1].replace('\r\n', '').rstrip() return value_rate, time_value, value_currency # else: # print("获取汇率失败!", finally_list[1][-1].replace('\r\n', '').rstrip()) # return None except Exception as e: raise e def execute_non_query_br(self, SQLstr): # SSH 和数据库配置 ssh_host = '47.47.47.47' # SSH 服务器的IP地址 ssh_username = 'odoo' ssh_password = '######' db_user = 'oooo_user' db_password = '3333333#' db_name = 'BBB' tunnel = None conn = None try: # 建立 SSH 隧道 tunnel = SSHTunnelForwarder( (ssh_host, 22), # SSH 服务器的地址和端口 ssh_username=ssh_username, ssh_password=ssh_password, remote_bind_address=('localhost', 5432) # PostgreSQL 服务器的地址和端口 ) tunnel.start() # 建立数据库连接 conn = psycopg2.connect( host='localhost', # 因为我们绑定到了本地 port=tunnel.local_bind_port, # 使用隧道提供的本地端口 user=db_user, password=db_password, dbname=db_name, # sslmode='require' # 禁用 SSL ) # 执行数据库操作 cur = conn.cursor() cur.execute(SQLstr) conn.commit() return True except Exception as e: logging.error('导入过程出错: %s', e) return False finally: # 确保关闭所有连接 if cur is not None: cur.close() if conn is not None: conn.close() if tunnel is not None: tunnel.stop() # 执行函数 def job_br(): try: obj = GetRat() start_date = datetime.strptime('2024-01-12', '%Y-%m-%d') end_date = datetime.strptime('2024-07-15', '%Y-%m-%d') current_date = start_date count = 0 while current_date <= end_date: date_str = current_date.strftime('%Y-%m-%d') # 模拟系统时间为current_date,以便get_rate获取对应日期的汇率 orig_time = time.localtime time.localtime = lambda: current_date.timetuple() message = obj.get_rate() time.localtime = orig_time # 恢复系统时间 if message and len(message) > 1 and message[-1] == '$': rate = message[0] timestamp = message[1] create_write_date = current_date.strftime('%Y-%m-%d %H:%M:%S') sql_str = 'INSERT INTO br_exchange_rate(crawler_time, crawler_day, rate, create_date, create_uid, write_date, write_uid) VALUES ' \ + f"('{timestamp}', '{timestamp[:10]}', {rate:.4f}, '{create_write_date}', 2, '{create_write_date}', 2)" print('在处理:', date_str, '的汇率为:', rate) logging.info(sql_str) obj.execute_non_query_br(SQLstr=sql_str) logging.info('汇率已经写入数据库!') msg_all = f'汇率:{rate:.4f} | 时间:{timestamp}\r\n' with open(r'huilv.txt', mode='a', encoding="utf-8") as file_handle: file_handle.write(msg_all) file_handle.write('\n') else: obj.send_mail(f"警告: {date_str} 的汇率数据库没有获取到数据,请检查状态!!!") current_date += timedelta(days=1) count += 1 # 每处理3个日期后,休息随机的时间(1到2分钟) if count % 3 == 0: sleep_time = random.randint(60, 120) logging.info(f"休息 {sleep_time} 秒...") time.sleep(sleep_time) except Exception as f: logging.error(f"An error occurred: {f}") raise f # 执行函数 job_br()