导出题库到docx,第一次写python(勿喷~~啊)
import time import docx from selenium import webdriver from docx.oxml.ns import qn # 中文格式 from docx.shared import Pt # 磅数 #from docx.shared import RGBColor chrome_options = webdriver.ChromeOptions() # chrome_options.add_argument('--headless') #无界面浏览器模式 # 创建内存中的word文档对象 file = docx.Document() file.styles['Normal'].font.name = u'宋体' file.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体') file.styles['Normal'].font.size = Pt(7.5) # 谷歌浏览器驱动 browser = webdriver.Chrome(executable_path=r"C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe",options=chrome_options) # browser.maximize_window() 最大化 # 地址 browser.get("http://newtk.baitizhan.com/index.php?s=Home/Index/index/taocan_id/443") time.sleep(3) # 模拟登陆 login_name = "123456" # 登录名手机号 login_pwd = "123123" #密码 username = browser.find_element_by_xpath(r"//*[@id='app']/div[2]/div/div[2]/div[1]/form/div[1]/div[1]/div[1]/input") username.send_keys(login_name) password =browser.find_element_by_xpath(r"//*[@id='app']/div[2]/div/div[2]/div[1]/form/div[2]/div[1]/div[1]/input") password.send_keys(login_pwd) browser.find_element_by_xpath(r"//*[@id='app']/div[2]/div/div[2]/div[1]/form/div[3]/div[1]/button").click() time.sleep(3) print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>登陆成功<<<<<<<<<<<<<<<<<<<<<<<<') #选择类型 ###### Choose_Operates = browser.find_elements_by_class_name('ivu-card-body') Operates = Choose_Operates[-1].find_elements_by_tag_name('p') Operates[0].click() #切换页面url # 切换当前页面标签 进入章节练习 browser.switch_to.window(browser.window_handles[-1]) menu = browser.find_elements_by_class_name('menu') menus = menu[-1].find_elements_by_tag_name('a') menus[1].click() time.sleep(2) #定位iframe iframe = browser.find_element_by_id("Main") #切换到iframe browser.switch_to.frame(iframe) time.sleep(3) #定位iframe中的元素 ''' 定义方法 type 1.单选,多选,判断 2.计算题,总和题 ''' def getCoiceQuestion(type, docxName, da=1, duo=1): x = None #单 y = None #多 if type == '': exit('错误终止程序') bigTitle = browser.find_element_by_xpath(r"//*[@id='tikuName']").text ################################单选############################################# ulstr = "//*[@id='tikuVue']/div[3]/div[4]/div[2]/ul" ul = browser.find_element_by_xpath(ulstr) # 获取题总数 li = ul.find_elements_by_xpath('li') counts = len(li) file.add_heading(bigTitle, 1) if type == 1: if da == 1: x = 1 print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>单选题总数:" + str(counts) + "<<<<<<<<<<<<<<<<<<<<<<<<") print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>开始导入单选题<<<<<<<<<<<<<<<<<<<<<<<<") file.add_heading('单选题', 2) for index in range(counts): # 点击题号 browser.find_element_by_xpath(ulstr + "/li[" + str(index + 1) + "]").click() # 获取题目 title = str(index + 1) + "、" + browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[1]/div[2]/p[2]").text # 获取选项 a = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[1]/label").text atitle = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[1]/label/span").text b = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[2]/label").text btitle = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[2]/label/span").text c = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[3]/label").text ctitle = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[3]/label/span").text d = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[4]/label").text dtitle = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[4]/label/span").text # 写入 file.add_paragraph(title) file.add_paragraph(a + atitle) file.add_paragraph(b + btitle) file.add_paragraph(c + ctitle) file.add_paragraph(d + dtitle) file.save("E:\\" + docxName + ".docx") print(">>>>>导入单选完成,共:" + str(index + 1) + "道") else: x = 0 if duo == 1: y = 1 #########################################多选题################################### print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>开始导入多选题<<<<<<<<<<<<<<<<<<<<<<<<") if da == 0: duulstr = "//*[@id='tikuVue']/div[3]/div[4]/div[2]/ul" else: duulstr = "//*[@id='tikuVue']/div[3]/div[5]/div[2]/ul" duul = browser.find_element_by_xpath(duulstr) # 获取题总数 duli = duul.find_elements_by_xpath('li') ducounts = len(duli) print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>多选题总数:" + str(ducounts) + "<<<<<<<<<<<<<<<<<<<<<<<<") print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>开始导入多选题<<<<<<<<<<<<<<<<<<<<<<<<") file.add_heading('多选题', 2) for index in range(ducounts): #点击题号 browser.find_element_by_xpath(duulstr + "/li[" + str(index + 1) + "]").click() # 获取题目 dutitle = str(index + 1) + "、" + browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[1]/div[2]/p[2]").text # 获取选项 adu = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[1]/label").text auatitle = browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[1]/label/span").text bdu = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[2]/label").text buatitle = browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[2]/label/span").text cdu = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[3]/label").text cuatitle = browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[3]/label/span").text ddu = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[4]/label").text duatitle = browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[2]/div[4]/label/span").text file.add_paragraph(dutitle) file.add_paragraph(adu + auatitle) file.add_paragraph(bdu + buatitle) file.add_paragraph(cdu + cuatitle) file.add_paragraph(ddu + duatitle) file.save("E:\\" + docxName + ".docx") print(">>>>>导入单选完成,共:" + str(index + 1) + "道") else: y = 0 #########################################判断题################################### print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>开始导入判断题<<<<<<<<<<<<<<<<<<<<<<<<") print(x) #da 0 print(y) # duo 1 print(da) #dan 0 print(duo)#duo 1 if (da == 0 or duo == 0) and (x == 1 or y == 1): pulstr = "//*[@id='tikuVue']/div[3]/div[5]/div[2]/ul" print(pulstr) elif x == 0 and da == 0: pulstr = "//*[@id='tikuVue']/div[3]/div[4]/div[2]/ul" print(pulstr) elif x == 0 and duo == 1: pulstr = "//*[@id='tikuVue']/div[3]/div[5]/div[2]/ul" print(pulstr) elif da == 1 and duo == 1: pulstr = "//*[@id='tikuVue']/div[3]/div[6]/div[2]/ul" print(pulstr) else: pulstr = '---------------' print(pulstr) exit() print(pulstr) pul = browser.find_element_by_xpath(pulstr) # 获取题总数 pli = pul.find_elements_by_xpath('li') pcounts = len(pli) print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>判断题总数:" + str(pcounts) + "<<<<<<<<<<<<<<<<<<<<<<<<") print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>开始导入判断题<<<<<<<<<<<<<<<<<<<<<<<<") file.add_heading('判断题', 2) for index in range(pcounts): # 点击题号 browser.find_element_by_xpath(pulstr + "/li[" + str(index + 1) + "]").click() # 获取题目 putitle = str(index + 1) + "、" + browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[1]/div[2]/p[2]").text # 获取选项 ap = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[1]/label").text bp = browser.find_element_by_xpath("//*[@id='tikuVue']/div[4]/div[1]/div[2]/div[1]/div[2]/label").text file.add_paragraph(putitle) file.add_paragraph(ap) file.add_paragraph(bp) file.save("E:\\" + docxName + ".docx") print(">>>>>导入单选完成,共:" + str(index + 1) + "道") #综合题 elif type == 2: print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>计算题总数:" + str(counts) + "<<<<<<<<<<<<<<<<<<<<<<<<") print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>开始导入计算题<<<<<<<<<<<<<<<<<<<<<<<<") #file.add_heading('计算题', 2) # 点击题号 for index in range(counts): browser.find_element_by_xpath(ulstr + "/li[" + str(index + 1) + "]").click() # 获取题目 content = str(index + 1) + "、" + browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[1]/div[2]/p[1]").text question = browser.find_element_by_xpath( "//*[@id='tikuVue']/div[4]/div[1]/div[1]/div[2]/p[2]").text file.add_paragraph(content) file.add_paragraph(question) file.add_paragraph('') file.save("E:\\" + docxName + ".docx") print(">>>>>导入单选完成,共:" + str(index + 1) + "道") return browser.close() def getdoxcname(type, lititle, linumber, stust='no', da=1, duo=1): windows = browser.window_handles browser.switch_to.window(windows[0]) time.sleep(3) # 定位iframe iframe = browser.find_element_by_id("Main") # 切换到iframe browser.switch_to.frame(iframe) time.sleep(3) docxName2 = browser.find_element_by_xpath(r"//*[@id='chapterlist']/ul/li["+str(lititle)+"]/a").text if stust == 'yes': browser.find_element_by_xpath(r"//*[@id='chapterlist']/ul/li["+str(lititle)+"]/a").click() browser.find_element_by_xpath(r"//*[@id='chapterlist']/ul/li[" + str(linumber) + "]/a[2]").click() else: browser.find_element_by_xpath(r"//*[@id='chapterlist']/ul/li["+str(linumber)+"]/a[2]").click() time.sleep(2) # 跳转到新页面 windows = browser.window_handles browser.switch_to.window(windows[-1]) return getCoiceQuestion(type, docxName2, da, duo) ''' 22,23,24,25,26 长期股权投资 getdoxcname(1, 22, 23, 'yes') getdoxcname(1, 22, 24) getdoxcname(2, 22, 25) getdoxcname(2, 22, 26) time.sleep(1) ''' ''' 27, 28,29,30,31.32.33 投资性房地产 type 1 选择 判断 2 计算 综合 getdoxcname(1, 27, 28, 'yes') getdoxcname(1, 27, 29) getdoxcname(1, 27, 30) getdoxcname(1, 27, 31) getdoxcname(2, 27, 32) getdoxcname(2, 27, 33) ''' #减值 ''' 34 标题 35 36 37 38 39 ''' ''' #金融资产和金融负债 getdoxcname(1, 40, 41, 'yes') getdoxcname(1, 40, 42) getdoxcname(1, 40, 43) getdoxcname(2, 40, 44) getdoxcname(2, 40, 45) ''' #职工薪酬及借款费用 ''' getdoxcname(1, 46, 47, 'yes') getdoxcname(1, 46, 48) getdoxcname(2, 46, 49) getdoxcname(2, 46, 50) ''' #或有事项 ''' getdoxcname(1, 51, 52, 'yes') getdoxcname(1, 51, 53) getdoxcname(1, 51, 54) getdoxcname(2, 51, 55) getdoxcname(2, 51, 56) ''' #收入 ''' getdoxcname(1, 57, 58, 'yes', 0) getdoxcname(1, 57, 59) getdoxcname(1, 57, 60) getdoxcname(1, 57, 61) getdoxcname(2, 57, 62) getdoxcname(2, 57, 63) ''' #政府补助 ''' getdoxcname(1, 64, 65, 'yes') getdoxcname(1, 64, 66) getdoxcname(2, 64, 67) ''' #所得税 ''' getdoxcname(1, 68, 69, 'yes') getdoxcname(1, 68, 70) getdoxcname(1, 68, 71) getdoxcname(2, 68, 72) getdoxcname(2, 68, 73) ''' #外币折算 ''' getdoxcname(1, 74, 75, 'yes') getdoxcname(1, 74, 76) getdoxcname(2, 74, 77) ''' #财务报告 ''' getdoxcname(1, 78, 79, 'yes', 1, 1) getdoxcname(1, 78, 80) getdoxcname(1, 78, 81) getdoxcname(1, 78, 82, 'no', 1, 0) getdoxcname(1, 78, 83) getdoxcname(1, 78, 84, 'no', 0, 1) getdoxcname(1, 78, 85, 'no', 0, 0) getdoxcname(2, 78, 86) getdoxcname(2, 78, 87) ''' #会计政策、会计估计变更和差错更正 #getdoxcname(1, 88, 89, 'yes') #getdoxcname(1, 88, 90) #getdoxcname(1, 88, 91) #getdoxcname(2, 88, 92) #getdoxcname(2, 88, 93) #资产负债表日后事项 ''' getdoxcname(1, 94, 95, 'yes') getdoxcname(1, 94, 96) getdoxcname(1, 94, 97) getdoxcname(2, 94, 98) getdoxcname(2, 94, 99) ''' #政府会计 ''' getdoxcname(1, 100, 101, 'yes') getdoxcname(1, 100, 102) getdoxcname(2, 100, 103) ''' #民间非营利组织会计 getdoxcname(1, 104, 105, 'yes') getdoxcname(1, 104, 106) getdoxcname(1, 104, 107) print("----------------------结束-------------------")