从中国期货监控中心爬取每日交易数据

# 中国期货监控中心 爬取 每日交易数据

import os
import time
import traceback
from share import SI
from datetime import datetime, timedelta, date
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions
from PySide2.QtWidgets import QMessageBox
from PySide2.QtCore import Qt
from PySide2.QtGui import QIcon
from VeriCodeFromBhshare import getLocalVericode # 从 www.bhshare.cn 进行 图片 验证码识别
from VeriCodeFromBaidu import getwords as bdgetgetwords # 从 百度 进行 图片 验证码识别
# 中国期货监控中心-投资者查询服务系统
DAILY_SETTLEMENT_RESULT_CODE_OK = 1001 # 返回结果正常
DAILY_SETTLEMENT_RESULT_CODE_UNKNOW = 1002 # 未知异常
DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE = 1003 # 非交易日
DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER = 1004 # 尚未注册
class NotRegistered(Exception):
pass
class DailySettlementQueryError(Exception):
pass
class CfmmcBrowser(object):
def __init__(self, userID, password, saveto='./', datelist=[]):
super(CfmmcBrowser, self).__init__()
self.userID = userID
self.password = password
self.saveto = saveto
self.ocrTOKEN = '424a894cd' # token 获取:http://www.bhshare.cn/imgcode/gettoken
self.ocrURL = 'http://www.bhshare.cn/imgcode/' # 接口地址
self.newDateList = []
# print(datelist)
if len(datelist) > 0:
for i in range(len(datelist)):
# print(datelist[i])
if isinstance(datelist[i], datetime) and datelist[i] <= datetime.today() - timedelta(days=1):
self.newDateList.append(datelist[i].strftime('%Y-%m-%d'))
if len(self.newDateList) == 0:
self.newDateList.append(datetime.today() - timedelta(days=3).strftime('%Y-%m-%d'))
self.newDateList.append(datetime.today() - timedelta(days=2).strftime('%Y-%m-%d'))
self.newDateList.append(datetime.today() - timedelta(days=1).strftime('%Y-%m-%d'))
now = datetime.now()
if now.time().hour > 17:
# 下午5点就可以查询当天数据了
self.newDateList.append(datetime.today().strftime('%Y-%m-%d'))
print(self.newDateList)
# exit()
self.__cfmmcUrl = 'https://investorservice.cfmmc.com/login.do'
# 浏览器实例
drivefile = os.path.join(os.getcwd(), 'chromedriver.exe').replace('\\','/') # chromedriver/
# print(drivefile.replace('\\','/'))
# exit()
picfile = os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\','/')
# 隐藏浏览器界面
# 防止检测
option = ChromeOptions()
chrome_option = ChromeOptions()
prefs = {"download.default_directory": self.saveto}
chrome_option.add_experimental_option("prefs", prefs)
chrome_option.add_argument('--disable-extensions')
chrome_option.add_argument('-ignore-certificate-errors')
chrome_option.add_argument('-ignore -ssl-errors')
chrome_option.add_argument('--disable-gpu')
chrome_option.add_argument('--no-sandbox')
option.add_experimental_option('excludeSwitches', ['enable-logging']) # enable-automation
# 导入配置
option.binary_location = SI.chromepath
chromePath = os.path.join(SI.appPath, 'chromedriver.exe')
self.browser = webdriver.Chrome(executable_path=chromePath, chrome_options=chrome_option, options=option)
if self.login():
try:
self.downloadDailySettlement() # 指定查询数据日期 ,并提交查询
except:
pass
finally:
self.browser.quit()
picfile = os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/')
if os.path.exists(picfile):
os.remove(picfile)
def login(self):
"""
进行登陆
:return: bool(是否登录成功)
"""
# 打开页面
self.browser.get(self.__cfmmcUrl) # cfmmcUrl
# 输入账号密码
userID = self.browser.find_element(By.NAME, 'userID')
userID.clear()
userID.send_keys(self.userID)
# self.browser.find_element_by_name('password').send_keys(self.password)
self.browser.find_element(by=By.NAME, value='password').send_keys(self.password)
code = self.browser.find_element(By.ID, "imgVeriCode")
code.screenshot('imgVeriCode.png') # 针对当前节点进行单独截图
# vericode = self.getLocalVericode('imgVeriCode.png') # 获取验证码文字
imgfile = os.path.join(os.getcwd(), "imgVeriCode.png")
vericode = getLocalVericode(imgfile) # 通过 http://www.bhshare.cn/imgcode/ 获取验证码文字
if vericode == 'error': # 如 上述 未得到 结果 转为 通过 百度云 获得验证码
print(' Bhshare error ! ')
vericode = bdgetgetwords(imgfile).getwords()['words_result'][0]['words']
if os.path.exists(os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/')):
os.remove(os.path.join(os.getcwd(), 'imgVeriCode.png').replace('\\', '/'))
if vericode.isalnum(): # 判断 是否是由数字和字母 组成
if len(vericode) == 6: # 判断 串长度 是否为6位
# print("验证码:" + vericode)
# 清空输入框并填入验证码
self.browser.find_element(by=By.NAME, value='vericode').clear()
self.browser.find_element(by=By.NAME, value='vericode').send_keys(vericode)
# 登陆
self.browser.find_element(by=By.CLASS_NAME, value='buttonLogin').click()
# 登陆成功
try:
title = self.browser.find_element(by=By.CLASS_NAME, value='page-title-text').text
# print('登陆成功')
return '客户交易结算日报' in title
except:
return False
else:
return False
def downloadDailySettlement(self):
"""
执行下载每日结算
"""
print(' login success ! ')
try:
# for d in dates():
for d in range(len(self.newDateList)):
# 逐天下载
print(self.newDateList[d])
self.downloadSettlementByDate(self.newDateList[d])
# time.sleep(0.3)
except NotRegistered:
# 查询结束
pass
except DailySettlementQueryError:
traceback.print_exc()
raise
def downloadSettlementByDate(self, tdate):
"""
:param date:
:return:
"""
# 提交查询
customerForm = self.browser.find_element(by=By.NAME, value='customerForm')
tradeDate = customerForm.find_element(by=By.NAME, value='tradeDate')
tradeDate.clear()
time.sleep(0.3)
# assert isinstance(tdate, date)
# print(tdate.strftime('%Y-%m-%d'))
tradeDate.send_keys(tdate) # .strftime('%Y-%m-%d')
tradeDate.submit()
time.sleep(1)
# 检查错误提示
print(tradeDate)
code = self.checkDailySettlementQueryNotice()
# print(code)
# 查询每日交易的返回状态
# exit()
if code == DAILY_SETTLEMENT_RESULT_CODE_OK:
# 正常,继续查询
link = self.browser.find_elements(by=By.XPATH, value='//*[@id="waitBody"]/table/tbody/tr[1]/td/a')
if link:
url = link[0].get_attribute('href')
# print(url)
self.browser.get(url) # 不能用click,因为click点击字符串没用,直接用浏览器打开网址即可
time.sleep(1)
elif code == DAILY_SETTLEMENT_RESULT_CODE_UNKNOW:
raise DailySettlementQueryError() # 未知错误
elif code == DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE:
return # 非交易日,返回查询另一个交易日
elif code == DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER:
raise NotRegistered() # 注册日
def checkDailySettlementQueryNotice(self):
"""
检查错误提示
:return:
"""
try:
noticeEle = self.browser.find_element(by=By.ID, value='waitBody').find_element(By.TAG_NAME, 'li')
if '的交易结算报告,原因是期货公司未向监控中心报送该日数据' in noticeEle.text:
code = DAILY_SETTLEMENT_RESULT_CODE_NOT_REGISTER
elif '为非交易日,请重新选择交易日期' in noticeEle.text:
code = DAILY_SETTLEMENT_RESULT_CODE_NOT_TRADE
else:
# 未知异常
code = DAILY_SETTLEMENT_RESULT_CODE_UNKNOW
# print(noticeEle.text)
except NoSuchElementException:
# 无公告
code = DAILY_SETTLEMENT_RESULT_CODE_OK
return code
def alarmMessageBox(MESSAGE):
QMessageBox.setWindowModality(Qt.NonModal)
result=QMessageBox( QMessageBox.Information,"提示", MESSAGE, QMessageBox.Ok, self) # " 提示信息 ", " " + alarm)
def questMessage(tltie, MESSAGE, btn1, btn2): #
box = QMessageBox()
icon = QIcon("images/rlylogo.ico");
box.setWindowIcon(icon);
box.setWindowModality(Qt.NonModal)
box.setIcon(QMessageBox.Question)
box.setWindowTitle(tltie)
box.setText(MESSAGE)
if btn2 is not None:
box.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
else:
box.setStandardButtons(QMessageBox.Yes)
buttonY = box.button(QMessageBox.Yes)
buttonY.setStyleSheet("background: url(images/logInbg.png) no-repeat;background-color: transparent;\
border: none;\
cursor: pointer;\
width: 120px;\
height: 32px;\
color: #FFF;\
font-weight: bold;\
text-align: center;\
hover{background-color:rdb(255,93,52);}")
buttonY.setText(btn1)
if btn2 is not None:
buttonN = box.button(QMessageBox.No)
buttonN.setStyleSheet("background: url(images/logInbg.png) no-repeat;\
border: none;\
cursor: pointer;\
width: 120px;\
height: 32px;\
color: #FFF;\
font-weight: bold;\
text-align: center;")
buttonN.setText(btn2)
box.exec_()
if box.clickedButton() == buttonY:
result = True
else:
result = False
# box.setAttribute(Qt.WA_DeleteOnClose)
# print(buttonY.styleSheet())
return result
def is_valid_date(str):
'''判断是否是一个有效的日期字符串'''
try:
datetime.datetime.strptime(str, "%Y-%m-%d")
return True
except:
return False
if __name__ == '__main__':
import configparser as config
file = 'fmsconfig.ini'
configfile = os.path.join(os.getcwd(), file)
try:
setup_cfg = os.path.join(os.getcwd(), file)
if os.path.exists(setup_cfg):
parser = config.SafeConfigParser()
with open(setup_cfg, "r") as f:
parser.read_file(f)
def get(parser, name):
if parser.has_option("DEFAULT", name):
return parser.get("DEFAULT", name)
return None
startDate = get(parser, 'startDate')
endDate = get(parser, 'endDate')
except IOError:
startDate = None
endDate = None
if not startDate is None:
startDate = datetime.strptime(startDate, "%Y-%m-%d")
if not endDate is None:
endDate = datetime.strptime(endDate, "%Y-%m-%d")
if (startDate is None) or (endDate is None):
endDate = datetime.today()
startDate = datetime.today() - timedelta(days=int(SI.getDataDays))
accounts = ['013588768358'] # '022116501650', '0022988787', '013588768358'
saveto = SI.input_path
if not os.path.exists(saveto):
os.makedirs(saveto)
# print(type(startDate), type(endDate))
if isinstance(startDate, datetime) and isinstance(endDate, datetime) and startDate <= endDate:
# print(startDate, endDate)
def dates():
preDays = 0
while endDate - timedelta(days=preDays) >= startDate: # preDays < self.daynum:
yield endDate - timedelta(days=preDays)
preDays += 1
datelist = list(dates())
# print(datelist)
for t in range(len(accounts)):
CfmmcBrowser(accounts[t], SI.cfmmcpsw, saveto=saveto, datelist=datelist)
posted @   冀未然  阅读(168)  评论(0编辑  收藏  举报
(评论功能已被禁用)
相关博文:
阅读排行:
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
点击右上角即可分享
微信分享提示