从中国期货监控中心爬取每日交易数据
# 中国期货监控中心 爬取 每日交易数据
import os import time import traceback from share import SI from datetime import datetime, timedelta, date from selenium import webdriver from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By from selenium.webdriver import ChromeOptions from PySide2.QtWidgets import QMessageBox from PySide2.QtCore import Qt from PySide2.QtGui import QIcon from VeriCodeFromBhshare import getLocalVericode # 从 www.bhshare.cn 进行 图片 验证码识别 from VeriCodeFromBaidu import getwords as bdgetgetwords # 从 百度 进行 图片 验证码识别 # 中国期货监控中心-投资者查询服务系统 DAILY_SETTLEMENT_RESULT_CODE_OK = 1001 # 返回结果正常 DAILY_SETTLEMENT_RESULT_CODE_UNKNOW = 1002 # 未知异常 DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE = 1003 # 非交易日 DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER = 1004 # 尚未注册 class NotRegistered(Exception): pass class DailySettlementQueryError(Exception): pass class CfmmcBrowser(object): def __init__(self, userID, password, saveto='./', datelist=[]): super(CfmmcBrowser, self).__init__() self.userID = userID self.password = password self.saveto = saveto self.ocrTOKEN = '424a894cd' # token 获取:http://www.bhshare.cn/imgcode/gettoken self.ocrURL = 'http://www.bhshare.cn/imgcode/' # 接口地址 self.newDateList = [] # print(datelist) if len(datelist) > 0: for i in range(len(datelist)): # print(datelist[i]) if isinstance(datelist[i], datetime) and datelist[i] <= datetime.today() - timedelta(days=1): self.newDateList.append(datelist[i].strftime('%Y-%m-%d')) if len(self.newDateList) == 0: self.newDateList.append(datetime.today() - timedelta(days=3).strftime('%Y-%m-%d')) self.newDateList.append(datetime.today() - timedelta(days=2).strftime('%Y-%m-%d')) self.newDateList.append(datetime.today() - timedelta(days=1).strftime('%Y-%m-%d')) now = datetime.now() if now.time().hour > 17: # 下午5点就可以查询当天数据了 self.newDateList.append(datetime.today().strftime('%Y-%m-%d')) print(self.newDateList) # exit() self.__cfmmcUrl = 'https://investorservice.cfmmc.com/login.do' # 浏览器实例 drivefile = os.path.join(os.getcwd(), 'chromedriver.exe').replace('\\','/') # chromedriver/ # print(drivefile.replace('\\','/')) # exit() picfile = os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\','/') # 隐藏浏览器界面 # 防止检测 option = ChromeOptions() chrome_option = ChromeOptions() prefs = {"download.default_directory": self.saveto} chrome_option.add_experimental_option("prefs", prefs) chrome_option.add_argument('--disable-extensions') chrome_option.add_argument('-ignore-certificate-errors') chrome_option.add_argument('-ignore -ssl-errors') chrome_option.add_argument('--disable-gpu') chrome_option.add_argument('--no-sandbox') option.add_experimental_option('excludeSwitches', ['enable-logging']) # enable-automation # 导入配置 option.binary_location = SI.chromepath chromePath = os.path.join(SI.appPath, 'chromedriver.exe') self.browser = webdriver.Chrome(executable_path=chromePath, chrome_options=chrome_option, options=option) if self.login(): try: self.downloadDailySettlement() # 指定查询数据日期 ,并提交查询 except: pass finally: self.browser.quit() picfile = os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/') if os.path.exists(picfile): os.remove(picfile) def login(self): """ 进行登陆 :return: bool(是否登录成功) """ # 打开页面 self.browser.get(self.__cfmmcUrl) # cfmmcUrl # 输入账号密码 userID = self.browser.find_element(By.NAME, 'userID') userID.clear() userID.send_keys(self.userID) # self.browser.find_element_by_name('password').send_keys(self.password) self.browser.find_element(by=By.NAME, value='password').send_keys(self.password) code = self.browser.find_element(By.ID, "imgVeriCode") code.screenshot('imgVeriCode.png') # 针对当前节点进行单独截图 # vericode = self.getLocalVericode('imgVeriCode.png') # 获取验证码文字 imgfile = os.path.join(os.getcwd(), "imgVeriCode.png") vericode = getLocalVericode(imgfile) # 通过 http://www.bhshare.cn/imgcode/ 获取验证码文字 if vericode == 'error': # 如 上述 未得到 结果 转为 通过 百度云 获得验证码 print(' Bhshare error ! ') vericode = bdgetgetwords(imgfile).getwords()['words_result'][0]['words'] if os.path.exists(os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/')): os.remove(os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/')) if vericode.isalnum(): # 判断 是否是由数字和字母 组成 if len(vericode) == 6: # 判断 串长度 是否为6位 # print("验证码:" + vericode) # 清空输入框并填入验证码 self.browser.find_element(by=By.NAME, value='vericode').clear() self.browser.find_element(by=By.NAME, value='vericode').send_keys(vericode) # 登陆 self.browser.find_element(by=By.CLASS_NAME, value='buttonLogin').click() # 登陆成功 try: title = self.browser.find_element(by=By.CLASS_NAME, value='page-title-text').text # print('登陆成功') return '客户交易结算日报' in title except: return False else: return False def downloadDailySettlement(self): """ 执行下载每日结算 """ print(' login success ! ') try: # for d in dates(): for d in range(len(self.newDateList)): # 逐天下载 print(self.newDateList[d]) self.downloadSettlementByDate(self.newDateList[d]) # time.sleep(0.3) except NotRegistered: # 查询结束 pass except DailySettlementQueryError: traceback.print_exc() raise def downloadSettlementByDate(self, tdate): """ :param date: :return: """ # 提交查询 customerForm = self.browser.find_element(by=By.NAME, value='customerForm') tradeDate = customerForm.find_element(by=By.NAME, value='tradeDate') tradeDate.clear() time.sleep(0.3) # assert isinstance(tdate, date) # print(tdate.strftime('%Y-%m-%d')) tradeDate.send_keys(tdate) # .strftime('%Y-%m-%d') tradeDate.submit() time.sleep(1) # 检查错误提示 print(tradeDate) code = self.checkDailySettlementQueryNotice() # print(code) # 查询每日交易的返回状态 # exit() if code == DAILY_SETTLEMENT_RESULT_CODE_OK: # 正常,继续查询 link = self.browser.find_elements(by=By.XPATH, value='//*[@id="waitBody"]/table/tbody/tr[1]/td/a') if link: url = link[0].get_attribute('href') # print(url) self.browser.get(url) # 不能用click,因为click点击字符串没用,直接用浏览器打开网址即可 time.sleep(1) elif code == DAILY_SETTLEMENT_RESULT_CODE_UNKNOW: raise DailySettlementQueryError() # 未知错误 elif code == DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE: return # 非交易日,返回查询另一个交易日 elif code == DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER: raise NotRegistered() # 注册日 def checkDailySettlementQueryNotice(self): """ 检查错误提示 :return: """ try: noticeEle = self.browser.find_element(by=By.ID, value='waitBody').find_element(By.TAG_NAME, 'li') if '的交易结算报告,原因是期货公司未向监控中心报送该日数据' in noticeEle.text: code = DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER elif '为非交易日,请重新选择交易日期' in noticeEle.text: code = DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE else: # 未知异常 code = DAILY_SETTLEMENT_RESULT_CODE_UNKNOW # print(noticeEle.text) except NoSuchElementException: # 无公告 code = DAILY_SETTLEMENT_RESULT_CODE_OK return code def alarmMessageBox(MESSAGE): QMessageBox.setWindowModality(Qt.NonModal) result=QMessageBox( QMessageBox.Information,"提示", MESSAGE, QMessageBox.Ok, self) # " 提示信息 ", " " + alarm) def questMessage(tltie, MESSAGE, btn1, btn2): # box = QMessageBox() icon = QIcon("images/rlylogo.ico"); box.setWindowIcon(icon); box.setWindowModality(Qt.NonModal) box.setIcon(QMessageBox.Question) box.setWindowTitle(tltie) box.setText(MESSAGE) if btn2 is not None: box.setStandardButtons(QMessageBox.Yes | QMessageBox.No) else: box.setStandardButtons(QMessageBox.Yes) buttonY = box.button(QMessageBox.Yes) buttonY.setStyleSheet("background: url(images/logInbg.png) no-repeat;background-color: transparent;\ border: none;\ cursor: pointer;\ width: 120px;\ height: 32px;\ color: #FFF;\ font-weight: bold;\ text-align: center;\ hover{background-color:rdb(255,93,52);}") buttonY.setText(btn1) if btn2 is not None: buttonN = box.button(QMessageBox.No) buttonN.setStyleSheet("background: url(images/logInbg.png) no-repeat;\ border: none;\ cursor: pointer;\ width: 120px;\ height: 32px;\ color: #FFF;\ font-weight: bold;\ text-align: center;") buttonN.setText(btn2) box.exec_() if box.clickedButton() == buttonY: result = True else: result = False # box.setAttribute(Qt.WA_DeleteOnClose) # print(buttonY.styleSheet()) return result def is_valid_date(str): '''判断是否是一个有效的日期字符串''' try: datetime.datetime.strptime(str, "%Y-%m-%d") return True except: return False if __name__ == '__main__': import configparser as config file = 'fmsconfig.ini' configfile = os.path.join(os.getcwd(), file) try: setup_cfg = os.path.join(os.getcwd(), file) if os.path.exists(setup_cfg): parser = config.SafeConfigParser() with open(setup_cfg, "r") as f: parser.read_file(f) def get(parser, name): if parser.has_option("DEFAULT", name): return parser.get("DEFAULT", name) return None startDate = get(parser, 'startDate') endDate = get(parser, 'endDate') except IOError: startDate = None endDate = None if not startDate is None: startDate = datetime.strptime(startDate, "%Y-%m-%d") if not endDate is None: endDate = datetime.strptime(endDate, "%Y-%m-%d") if (startDate is None) or (endDate is None): endDate = datetime.today() startDate = datetime.today() - timedelta(days=int(SI.getDataDays)) accounts = ['013588768358'] # '022116501650', '0022988787', '013588768358' saveto = SI.input_path if not os.path.exists(saveto): os.makedirs(saveto) # print(type(startDate), type(endDate)) if isinstance(startDate, datetime) and isinstance(endDate, datetime) and startDate <= endDate: # print(startDate, endDate) def dates(): preDays = 0 while endDate - timedelta(days=preDays) >= startDate: # preDays < self.daynum: yield endDate - timedelta(days=preDays) preDays += 1 datelist = list(dates()) # print(datelist) for t in range(len(accounts)): CfmmcBrowser(accounts[t], SI.cfmmcpsw, saveto=saveto, datelist=datelist)
合集:
python
分类:
编程相关 / Python
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了