学习笔记20190309
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.common.exceptions import NoSuchElementException import time ###################常量设置 TIME_FOR_LOGIN=60###################预留n秒用于扫码登录 READING_STEP=10###################分10次读完一篇文章 TIME_FOR_READING=18###################n秒阅读完一篇文章十分之一 TIME_FOR_AJAX=10##########################预留三秒用于异步加载 HOME_PAGE='' PAPER_COUNT=6##############文章数6 VIDEO_COUNT=6#############视频数6 TIME_FOR_VIDEO=300#30秒后开始计时 TIME_FOR_NEWS=30 #创建浏览器 browser=webdriver.Chrome() ####################################阅读文章任务 browser.get(HOME_PAGE) #等待手工扫码登录 #time.sleep(TIME_FOR_LOGIN) for i in range(1,TIME_FOR_LOGIN+1):# print('已等待扫码:'+str(i)+'秒……') time.sleep(1) n=0 #先读取已观看记录 has_read=dict() read=open('观看记录.txt','r') for line in read: n+=1 has_read[line]='' read.close() n=0 read=open('观看记录.txt','a') #再读取文章列表 paper=dict() f=open('文章URL.txt','r') for line in f: n+=1 paper[line]='' f.close() readingIndex=0 for url in paper: #print(url) if url not in has_read and readingIndex<PAPER_COUNT : readingIndex=readingIndex+1 #阅读 计时 browser.get(url) read.write(url)#追加记录 for i in range(1,READING_STEP+1):# js="window.scrollTo("+str((i-1)/READING_STEP)+"*document.body.scrollHeight,"+str(i/READING_STEP)+"*document.body.scrollHeight)" print('第 '+str(readingIndex)+' 篇文章浏览进度:'+str(i/READING_STEP*100)+'% ') browser.execute_script(js) #time.sleep(TIME_FOR_READING) for i in range(1,TIME_FOR_READING+1): print('正在阅读第'+str(readingIndex)+'篇文章,已观看:'+str(i//60)+'分'+str(i % 60)+'秒') time.sleep(1) #再读取视频记录 news=dict() f=open('视频URL.txt','r') for line in f: n+=1 news[line]='' f.close() readingIndex=1 for url in news: #print('视频'+url) if url not in has_read and readingIndex<=VIDEO_COUNT : #阅读 计时 browser.get(url) print('请稍后,正在判断视频') #time.sleep(TIME_FOR_AJAX) for i in range(1,TIME_FOR_AJAX+1):# print('请稍后,正在判断视频:'+str(i)+'秒……') time.sleep(1) try: isnews=True e=browser.find_element(By.ID,'C2k4pjg7itcs00')#Cgi6ap0m271c00 except NoSuchElementException: isnews=False print('这个不是新闻联播') if isnews==False: print('正在观看第'+str(readingIndex)+'个视频') try: btn=browser.find_element_by_class_name('outter') btn.click() read.write(url)#追加记录 time.sleep(TIME_FOR_AJAX) for i in range(1,TIME_FOR_VIDEO+1): print('正在观看第'+str(readingIndex)+'个视频,已观看:'+str(i//60)+'分'+str(i % 60)+'秒') time.sleep(1) readingIndex=readingIndex+1 except NoSuchElementException: print("这不是视频网页") #观看新闻联播 readingIndex=0 for url in news: #print(url) if url not in has_read and readingIndex<1 : #阅读 计时 browser.get(url) #time.sleep(TIME_FOR_AJAX) for i in range(1,TIME_FOR_AJAX+1):# print('请稍后,正在判断新闻:'+str(i)+'秒……') time.sleep(1) try: isnews=True e=browser.find_element(By.ID,'C2k4pjg7itcs00')#Cgi6ap0m271c00 'Cgi6ap0m271c00 except NoSuchElementException: isnews=False if isnews==True: readingIndex+=1 #browser.get(url) #browser.refresh() read.write(url)#追加记录 btn=browser.find_element_by_class_name('outter') btn.click() time.sleep(TIME_FOR_AJAX) for i in range(1,TIME_FOR_NEWS+1): print('正在观看新闻联播,已观看:'+str(i//60)+'分'+str(i % 60)+'秒') time.sleep(1) read.close()