未通过原因:近期在全网出现过高度相似文章被认为是旧闻
from bs4 import * from pyquery import PyQuery as pq from selenium import webdriver from selenium.webdriver.common.keys import Keys import os import requests import time import threading import logging import random browser = webdriver.Chrome() url = 'https://so.gushiwen.org/shiwenv_ee16df5673bc.aspx' browser.get(url) js = "a_=document.getElementsByTagName('a');le=a_.length;for(i=0;i<le;i++){if(a_[i].text=='展开阅读全文 ∨'){try{a_[i].click()}catch(err){console.log(err)}}}" try: browser.execute_script(js) except Exception as e: print(e) ck_l_ori_len = len(browser.find_elements_by_link_text('展开阅读全文 ∨')) ck_l_ori_ok = 0 try: for isc in range(100): if ck_l_ori_ok == ck_l_ori_len: break time.sleep(1) js = 'window.scrollTo(0,document.body.scrollHeight)' js = 'window.scrollTo(0,100*{})'.format(isc) browser.execute_script(js) ck_l = browser.find_elements_by_link_text('展开阅读全文 ∨') for i in ck_l: try: i.click() ck_l_ori_ok += 1 except Exception as e: print(e) except Exception as e: print('window.scrollTo-->', e) doc = pq(browser.page_source) pq_r_d = {'xmlns="http://www.w3.org/1999/xhtml"': ''} r_k, r_v = 'xmlns="http://www.w3.org/1999/xhtml"', '' article_ = doc('.left>:nth-child(2).sons>.cont>.contson').html().replace(r_k, r_v) title_d = {'h1': doc('.left>:nth-child(2).sons>.cont>:nth-child(2)').html().replace(r_k, r_v)} author_d = {'h3': doc('.left>:nth-child(2).sons>.cont>:nth-child(3)').text()} translation_ = doc('.left>:nth-child(4)>.contyishang>:nth-child(2)').html().replace(r_k, r_v) explanation_ = doc('.left>:nth-child(4)>.contyishang>:nth-child(3)').html().replace(r_k, r_v) refer_ = doc('.left>:nth-child(4)>.cankao').html().replace(r_k, r_v) author_img_url = doc('.left>.sonspic>.cont>.divimg>:nth-child(1)').html().split('src="')[-1].split('"')[0] k = 'h1' v = title_d[k] db_html = '<{}>{}</{}>'.format(k, v, k) k = 'h3' v = author_d[k] db_html = '{}<{}>{}</{}>'.format(db_html, k, v, k) db_html = '{}{}'.format(db_html, '<br><img src="{}" ><br>'.format(author_img_url)) l = [db_html, article_, explanation_, translation_, refer_] db_html = '<br><br>'.join(l) rp_s_l = ['<a href=', '<a href=', '<a title=', '<a title='] for rp_s in rp_s_l: img_n = db_html.count(rp_s) for i in range(img_n): p1 = db_html.index(rp_s, 0) tmp = '{}{}'.format(db_html[0:p1].replace('>', 'X'), db_html[p1 + 1:]) p2 = tmp.index('>') db_html = '{}{}{}'.format(db_html[0:p1], '', db_html[p2 + 1:]) f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514778729951003150/'] f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/'] f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)] # browser = webdriver.Chrome() js = 'window.location.href="{}";'.format(f_url_l_a) browser.execute_script(js) # browser.get(f_url_l_a) time.sleep(random.randint(10, 20)) js = 'window.location.href="https://sso.toutiao.com/login/";' js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";' browser.execute_script(js) time.sleep(random.randint(10, 20)) start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] logf = this_file_name + '.log' try: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]', datefmt='%a, %d %b %Y %H:%M:%S', filename=logf, filemode='a') except Exception as e: s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4002) logging.info('START') img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png' img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\' import random def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'): r = '%s%s' % (img_dir, local_default) try: bytes = requests.get(img_url)._content # r = '%s%s%s%s%s' % ( # img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), # img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=', # '_fxlequal_').replace( # '&', '_fxland_'), '.png') r = '%s%s%s%s%s' % ( img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), random.randint(1234, 9876), '.png') if bytes != 0: with open(r, 'wb')as f: f.write(bytes) except Exception as e: print(e) return r d = {} d['title'] = '《{}》{}'.format(title_d['h1'], author_d['h3'])[0:30] d['content'] = db_html ac_type, ac = 'qq', {} ac['u'], ac['p'] = '2783', 'w3q' if ac_type == 'qq': myid, mypwd = ac['u'], ac['p'] xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]' xp = '/html/body/div/div/div[2]/div/div/div/ul/li[2]' browser.find_element_by_xpath(xp).click() time.sleep(10) js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"') browser.execute_script(js) js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"') browser.execute_script(js) time.sleep(random.randint(5, 15)) xp_newpage = '//*[@id="go"]' browser.find_element_by_xpath(xp_newpage).click() time.sleep(random.randint(10, 20)) browser.refresh() js = 'window.location.href="https://www.toutiao.com/";' browser.execute_script(js) browser.refresh() time.sleep(6) js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";' js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) dbhtml_str, pgc_img_url_l = d['content'], [] myhtml = 'D:\\myhtml\\{}tmp.html'.format(random.randint(123, 999)) with open(myhtml, 'w', encoding='utf-8') as fw: fw.write(dbhtml_str) with open(myhtml, 'r', encoding='utf-8') as myhtml_o: bs = BeautifulSoup(myhtml_o, 'html.parser') pgc_img_url_l = [i.attrs['src'] for i in bs.find_all('img')] js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/resource-manager";' browser.execute_script(js) time.sleep(2) xp = '//*[@id="graphic"]/div/div/div[3]/div/div[1]/div[3]/div/input' upload = browser.find_element_by_xpath(xp) for img_url in pgc_img_url_l: local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url) upload.send_keys(local_img_path) time.sleep(10 * len(pgc_img_url_l)) pgc_img_url_l_toutiao = [i.find_element_by_tag_name('img').get_attribute('src') for i in browser.find_elements_by_class_name('pic')][0:len(pgc_img_url_l)] pgc_img_url_l_toutiao = [i for i in sorted(pgc_img_url_l_toutiao, reverse=True)] ''' <img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true"> ''' dbhtml_str_ = dbhtml_str img_n = dbhtml_str_.count('<img') s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">' s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">' # s = "<img onload='editor.fireEvent(\'contentchange\')' src='https://p1.pstatp.com/large/pgc-image/TTimgCode' _src='https://p1.pstatp.com/large/pgc-image/TTimgCode' alt='pgc-image/TTimgCode' buttonadded='true'>" ss = '' l = pgc_img_url_l_toutiao for i in range(img_n): if i == 0: p1 = dbhtml_str.index('<img ', 0) else: p1 = dbhtml_str.index('<img ', p1 + 3) tmp = '{}{}'.format(dbhtml_str[0:p1].replace('>', 'X'), dbhtml_str[p1:]) p2 = tmp.index('>') ss = s.replace('TTimgCode', l[0].split('/')[-1]) dbhtml_str = '{}{}{}'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:]) del l[0] print('-----------------') print(dbhtml_str) time.sleep(2) js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) xp_newpage = '//*[@id="title"]' mytxt = d['title'] browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) browser.find_element_by_xpath(xp_newpage).send_keys(mytxt) time.sleep(2) xp = '//*[@id="edui18_body"]/div[1]' # //*[@id="edui18_body"]/div[1] browser.find_element_by_xpath(xp).click() time.sleep(2) xp = '//*[@id="images"]/div[1]/div/span' browser.find_element_by_xpath(xp).click() time.sleep(3) ''' 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(dbhtml_str.replace('onload="editor.fireEvent(\'contentchange\')"','').replace('"',"'").replace('\n','')) ''' # 结合浏览器控制台,拼接符合语法的js字符串 r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''} dbhtml_str_py_js = dbhtml_str for k in r_d: dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k]) dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ') js = 'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"'.format( dbhtml_str_py_js) browser.execute_script(js) # 自动 # Message: unknown error: Element is not clickable at point (589, 952) try: for isc in range(2): time.sleep(1) js = 'window.scrollTo(0,document.body.scrollHeight)' browser.execute_script(js) except Exception as e: print('window.scrollTo-->', e) time.sleep(10) try: xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[1]/div/label[3]/span' browser.find_element_by_xpath(xp).click() time.sleep(1) except Exception as e: print(e) time.sleep(2) # 不投放广告 xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[2]/div[2]/div[1]/label[2]/span' # 投放广告 xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[2]/div[2]/div[1]/label[1]/span' browser.find_element_by_xpath(xp).click() time.sleep(1) # 存草稿 xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[2]' # 发布 xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' browser.find_element_by_xpath(xp).click() time.sleep(random.randint(20, 30)) js = 'window.location.href="https://www.toutiao.com/"' browser.execute_script(js) try: browser.quit() except Exception as e1: print(e1) logging.exception(e1)
<h1>秋夜读书每以二鼓尽为节</h1><h3>宋代 : 陆游</h3><br><img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15246284315719a928e33a1" _src="https://p1.pstatp.com/large/pgc-image/15246284315719a928e33a1" alt="pgc-image/15246284315719a928e33a1" buttonadded="true"><br><br><br> 腐儒碌碌叹无奇,独喜遗编不我欺。<br />白发无情侵老境,青灯有味似儿时。<br />高梧策策传寒意,叠鼓冬冬迫睡期。<br />秋夜渐长饥作祟,一杯山药进琼糜。 <br><br><strong >注释<br /></strong>以二鼓尽为节:指读书读到二更天才停止。二鼓,指更鼓报过二更。<br />腐儒:作者自称。<br />碌碌:平庸,无所作为。<br />遗编:遗留后世的著作,泛指古代典籍。<br />不我欺:并不欺骗我。<br />策策:象声词,指风摇动树叶发出的响声。<br />叠鼓:轻轻击鼓,指更鼓。<br />冬冬:象声词,指鼓声。<br />迫睡期:催人睡觉。<br />作祟:暗中捣鬼,形容夜深了还没有睡觉,肚子饿了。<br />琼糜:像琼浆一样甘美的粥。糜,粥。>▲</a><br><br><strong >译文<br /></strong>我这个迂腐的儒生,可叹一生碌碌无奇,却只爱前人留下来的著作,从不将我欺骗。<br />白发无情地爬上头顶,渐渐地进入老年,读书的青灯却依旧像儿时那样亲切有味。<br />高大的梧桐策策作响,传来一阵阵寒意,读书兴致正浓,忽听更鼓冬冬催人入睡。<br />秋夜漫漫,饥肠辘辘,再也难以读下去,喝杯山药煮成的薯粥,胜过那佳肴美味。<br><br> <p style=" color:#999999;margin:0px; font-size:12px;line-height:160%;">参考资料:</p> <div style="clear:both; float:left;color:#999999; font-size:12px; width:630px; margin-top:4px;"> <span style="width:20px; float:left;">1、</span> <span style="width:610px; float:left;">刘扬忠注评.陆游诗词选评:三秦出版社,2008.2:9-10</span> </div> <div style="clear:both; float:left;color:#999999; font-size:12px; width:630px; margin-top:4px;"> <span style="width:20px; float:left;">2、</span> <span style="width:610px; float:left;">(宋)陆游著,王水照,高克勤选注.陆游选集:人民文学出版社,1997年11月:16</span> </div> <div style="clear:both; float:left;color:#999999; font-size:12px; width:630px; margin-top:4px;"> <span style="width:20px; float:left;">3、</span> <span style="width:610px; float:left;">邓建烈主编.高中文言文精译精讲精练 高二:上海交通大学出版社,2006.06:35</span> </div>
from bs4 import * from pyquery import PyQuery as pq from selenium import webdriver from selenium.webdriver.common.keys import Keys import os import requests import time import threading import logging import random start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] logf = this_file_name + '.log' try: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]', datefmt='%a, %d %b %Y %H:%M:%S', filename=logf, filemode='a') except Exception as e: s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4002) logging.info('START') img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png' img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\' import random def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'): r = '%s%s' % (img_dir, local_default) try: bytes = requests.get(img_url)._content # r = '%s%s%s%s%s' % ( # img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), # img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=', # '_fxlequal_').replace( # '&', '_fxland_'), '.png') r = '%s%s%s%s%s' % ( img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), random.randint(1234, 9876), '.png') if bytes != 0: with open(r, 'wb')as f: f.write(bytes) except Exception as e: print(e) return r browser = webdriver.Chrome() url = 'https://so.gushiwen.org/shiwenv_ee16df5673bc.aspx' browser.get(url) f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514778729951003150/'] f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/'] f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)] # browser = webdriver.Chrome() js = 'window.location.href="{}";'.format(f_url_l_a) browser.execute_script(js) # browser.get(f_url_l_a) time.sleep(random.randint(10, 20)) js = 'window.location.href="https://sso.toutiao.com/login/";' js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";' browser.execute_script(js) time.sleep(random.randint(10, 20)) ac_type, ac = 'qq', {} ac['u'], ac['p'] = '344', 'gregr' if ac_type == 'qq': myid, mypwd = ac['u'], ac['p'] xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]' xp = '/html/body/div/div/div[2]/div/div/div/ul/li[2]' browser.find_element_by_xpath(xp).click() time.sleep(10) js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"') browser.execute_script(js) js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"') browser.execute_script(js) time.sleep(random.randint(5, 15)) xp_newpage = '//*[@id="go"]' browser.find_element_by_xpath(xp_newpage).click() time.sleep(random.randint(10, 20)) browser.refresh() with open('myurl.txt', 'r') as fr: for url in fr: url = url.replace('\n', '') try: # url = 'https://so.gushiwen.org/shiwenv_ee16df5673bc.aspx' # browser.get(url) js = 'window.location.href="{}";'.format(url) browser.execute_script(js) js = "a_=document.getElementsByTagName('a');le=a_.length;for(i=0;i<le;i++){if(a_[i].text=='展开阅读全文 ∨'){try{a_[i].click()}catch(err){console.log(err)}}}" try: browser.execute_script(js) except Exception as e: print(e) ck_l_ori_len = len(browser.find_elements_by_link_text('展开阅读全文 ∨')) ck_l_ori_ok = 0 try: for isc in range(100): if ck_l_ori_ok == ck_l_ori_len: break time.sleep(1) js = 'window.scrollTo(0,document.body.scrollHeight)' js = 'window.scrollTo(0,100*{})'.format(isc) browser.execute_script(js) ck_l = browser.find_elements_by_link_text('展开阅读全文 ∨') for i in ck_l: try: i.click() ck_l_ori_ok += 1 except Exception as e: print(e) except Exception as e: print('window.scrollTo-->', e) doc = pq(browser.page_source) pq_r_d = {'xmlns="http://www.w3.org/1999/xhtml"': ''} r_k, r_v = 'xmlns="http://www.w3.org/1999/xhtml"', '' article_ = doc('.left>:nth-child(2).sons>.cont>.contson').html().replace(r_k, r_v) title_d = {'h1': doc('.left>:nth-child(2).sons>.cont>:nth-child(2)').html().replace(r_k, r_v)} author_d = {'h3': doc('.left>:nth-child(2).sons>.cont>:nth-child(3)').text()} translation_ = doc('.left>:nth-child(4)>.contyishang>:nth-child(2)').html().replace(r_k, r_v) explanation_ = doc('.left>:nth-child(4)>.contyishang>:nth-child(3)').html().replace(r_k, r_v) refer_ = doc('.left>:nth-child(4)>.cankao').html().replace(r_k, r_v) author_img_url = doc('.left>.sonspic>.cont>.divimg>:nth-child(1)').html().split('src="')[-1].split('"')[0] k = 'h1' v = title_d[k] db_html = '<{}>{}</{}>'.format(k, v, k) k = 'h3' v = author_d[k] db_html = '{}<{}>{}</{}>'.format(db_html, k, v, k) db_html = '{}{}'.format(db_html, '<br><img src="{}" ><br>'.format(author_img_url)) l = [db_html, article_, explanation_, translation_, refer_] db_html = '<br><br>'.join(l) rp_s_l = ['<a href=', '<a href=', '<a title=', '<a title='] for rp_s in rp_s_l: img_n = db_html.count(rp_s) for i in range(img_n): p1 = db_html.index(rp_s, 0) tmp = '{}{}'.format(db_html[0:p1].replace('>', 'X'), db_html[p1 + 1:]) p2 = tmp.index('>') db_html = '{}{}{}'.format(db_html[0:p1], '', db_html[p2 + 1:]) d = {} d['title'] = '《{}》{}'.format(title_d['h1'], author_d['h3'])[0:30] d['content'] = db_html js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";' js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) dbhtml_str, pgc_img_url_l = d['content'], [] myhtml = 'D:\\myhtml\\{}tmp.html'.format(random.randint(123, 999)) with open(myhtml, 'w', encoding='utf-8') as fw: fw.write(dbhtml_str) with open(myhtml, 'r', encoding='utf-8') as myhtml_o: bs = BeautifulSoup(myhtml_o, 'html.parser') pgc_img_url_l = [i.attrs['src'] for i in bs.find_all('img')] js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/resource-manager";' browser.execute_script(js) time.sleep(2) xp = '//*[@id="graphic"]/div/div/div[3]/div/div[1]/div[3]/div/input' upload = browser.find_element_by_xpath(xp) for img_url in pgc_img_url_l: local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url) upload.send_keys(local_img_path) time.sleep(10 * len(pgc_img_url_l)) pgc_img_url_l_toutiao = [i.find_element_by_tag_name('img').get_attribute('src') for i in browser.find_elements_by_class_name('pic')][0:len(pgc_img_url_l)] pgc_img_url_l_toutiao = [i for i in sorted(pgc_img_url_l_toutiao, reverse=True)] ''' <img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true"> ''' dbhtml_str_ = dbhtml_str img_n = dbhtml_str_.count('<img') s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">' s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">' # s = "<img onload='editor.fireEvent(\'contentchange\')' src='https://p1.pstatp.com/large/pgc-image/TTimgCode' _src='https://p1.pstatp.com/large/pgc-image/TTimgCode' alt='pgc-image/TTimgCode' buttonadded='true'>" ss = '' l = pgc_img_url_l_toutiao for i in range(img_n): if i == 0: p1 = dbhtml_str.index('<img ', 0) else: p1 = dbhtml_str.index('<img ', p1 + 3) tmp = '{}{}'.format(dbhtml_str[0:p1].replace('>', 'X'), dbhtml_str[p1:]) p2 = tmp.index('>') ss = s.replace('TTimgCode', l[0].split('/')[-1]) dbhtml_str = '{}{}{}'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:]) del l[0] print('-----------------') print(dbhtml_str) time.sleep(2) js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) xp_newpage = '//*[@id="title"]' mytxt = d['title'] browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) browser.find_element_by_xpath(xp_newpage).send_keys(mytxt) time.sleep(2) xp = '//*[@id="edui18_body"]/div[1]' # //*[@id="edui18_body"]/div[1] browser.find_element_by_xpath(xp).click() time.sleep(2) xp = '//*[@id="images"]/div[1]/div/span' browser.find_element_by_xpath(xp).click() time.sleep(3) ''' 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(dbhtml_str.replace('onload="editor.fireEvent(\'contentchange\')"','').replace('"',"'").replace('\n','')) ''' # 结合浏览器控制台,拼接符合语法的js字符串 r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''} dbhtml_str_py_js = dbhtml_str for k in r_d: dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k]) dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ') js = 'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"'.format( dbhtml_str_py_js) browser.execute_script(js) # 自动 # Message: unknown error: Element is not clickable at point (589, 952) try: for isc in range(2): time.sleep(1) js = 'window.scrollTo(0,document.body.scrollHeight)' browser.execute_script(js) except Exception as e: print('window.scrollTo-->', e) time.sleep(10) try: xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[1]/div/label[3]/span' browser.find_element_by_xpath(xp).click() time.sleep(1) except Exception as e: print(e) time.sleep(2) # 不投放广告 xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[2]/div[2]/div[1]/label[2]/span' # 投放广告 xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[2]/div[2]/div[1]/label[1]/span' browser.find_element_by_xpath(xp).click() time.sleep(1) # 存草稿 xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[2]' # 发布 xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' browser.find_element_by_xpath(xp).click() time.sleep(random.randint(10, 20)) js = 'window.location.href="https://www.toutiao.com/"' browser.execute_script(js) except Exception as e: print(e) logging.exception(e) try: browser.quit() except Exception as e1: print(e1) logging.exception(e1)
重复校验的实时性