文章编辑器 文本替换 操作dom 发帖 富文本 今日头条发布富文本的实现 键盘化的html
js 修改 iframe
it=document.getElementById('ueditor_0').contentWindow.document.getElementsByTagName("body")[0];
it.innerHTML='<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
from selenium import webdriver from time import sleep import time from selenium.webdriver.common.keys import Keys import os import requests import time import threading import logging import random start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] logf = this_file_name + '.log' try: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]', datefmt='%a, %d %b %Y %H:%M:%S', filename=logf, filemode='a') except Exception as e: s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4002) logging.info('START') img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png' img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\' def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'): r = '%s%s' % (img_dir, local_default) try: bytes = requests.get(img_url)._content r = '%s%s%s%s%s' % ( img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=', '_fxlequal_').replace( '&', '_fxland_'), '.png') if bytes != 0: with open(r, 'wb')as f: f.write(bytes) except Exception as e: print(e) return r import pymysql h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke_', 'star_media_helper' def mysql_fetch(sql, res_type='tuple'): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8') except Exception as e: print(e) return () if res_type == 'dic': cursor = conn.cursor(pymysql.cursors.DictCursor) else: cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return cursor.fetchall() def mysql_write(sql): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8') except Exception as e: print(e) return 1 cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return 0 import random while True: logging.info('LOOP----') sql = 'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )' sql = 'SELECT username,password,toutiaoid FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )' res = mysql_fetch(sql) ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res] for ac in ac_l: myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid'] # 发布限制条件逻辑 sql = "SELECT * FROM joke__star_helper_relation_wukong_question WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); sql = "SELECT * FROM joke__helper_article_publish WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); print(sql) logging.info(sql) res_content = mysql_fetch(sql, 'dic') if len(res_content) == 0: continue id_article_list = [i['id_article_list'] for i in res_content] sql = 'SELECT * FROM joke__helper_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke__helper_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format( ','.join([i['id_article_list'] for i in res_content]), toutiao_uid) # sql = 'SELECT * FROM joke__star_helper_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke__star_helper_toutiaouser_wukong_question) LIMIT 1' logging.info(sql) res_content = mysql_fetch(sql, 'dic') if len(res_content) == 0: continue browser = webdriver.Chrome() f_url_l = ['https://www.toutiao.com/group/1589657566362638/', 'https://www.wukong.com/question/6388670742287876353/', 'https://www.wukong.com/tag/6215497898671475202/'] f_url_l += ['https://www.wukong.com/question/6512777037948649741/', 'https://www.wukong.com/question/6469247721038414093/', 'https://www.wukong.com/question/6481502080249889037/'] # f_url_l = [] f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514661446876398088/', 'https://www.toutiao.com/a6514778729951003150/'] f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/', 'https://www.toutiao.com/a6513334304318161411/'] f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)] # browser.get(random.choice(f_url_l)) browser.get(f_url_l_a) time.sleep(random.randint(10, 20)) js = 'window.location.href="https://sso.toutiao.com/login/";' js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";' browser.execute_script(js) time.sleep(random.randint(10, 20)) # js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";' browser.execute_script(js) ac_type = 'qq' if ac_type == 'qq': myid, mypwd = ac['u'], ac['p'] xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]' browser.find_element_by_xpath(xp).click() time.sleep(10) js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"') browser.execute_script(js) js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"') browser.execute_script(js) time.sleep(random.randint(5, 15)) xp_newpage = '//*[@id="go"]' browser.find_element_by_xpath(xp_newpage).click() time.sleep(random.randint(10, 20)) elif ac_type == 'mail_qq': continue time.sleep(5) browser.refresh() js = 'window.location.href="https://www.toutiao.com/";' browser.execute_script(js) browser.refresh() time.sleep(6) js = 'window.location.href="https://www.wukong.com/";' js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";' js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) # title js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"') js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万') browser.execute_script(js) time.sleep(2) fhtml, dbhtml_str = 'toutaio.db.html', '' with open(fhtml, 'r', encoding='utf-8') as fr: for hi in fr: dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', '')) db_html = dbhtml_str # db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>' js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format( db_html) browser.execute_script(js) time.sleep(2) xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' browser.find_element_by_xpath(xp).click() dd = 9 # js = 'document.getElementsByClassName("ask")[0].click();' # browser.execute_script(js) # time.sleep(12) # time.sleep(random.randint(10, 20)) # # 需要键盘事件 反爬虫 # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input') # tmp_target.send_keys(Keys.SPACE) # tmp_target.send_keys(Keys.CONTROL, 'a') # tmp_target.send_keys(Keys.CONTROL, 'x') # tmp_target.send_keys(Keys.CONTROL, 'v') # tmp_target.send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(10, 20)) # res_content = [] for i in res_content[0:1]: dbid, content, img_list = i['id'], i['content'], i['img_list'] tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏'] tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生'] tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格'] s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么', random.choice(tmp_l_2), '的', random.choice(tmp_l)) js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s) browser.execute_script(js) time.sleep(12) # # tmp_target.send_keys(Keys.SPACE) js = 'document.getElementsByClassName("step-btn next")[0].click();' browser.execute_script(js) # step-btn submit js = 'document.getElementsByClassName("step-btn submit")[0].click();' browser.execute_script(js) time.sleep(12) # js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid) browser.execute_script(js) time.sleep(12) res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[ 0].get_attribute('href') # print(i) # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea' # try: # browser.find_element_by_xpath(xp_newpage) # except Exception as e: # print(e) # break # browser.find_element_by_xpath(xp_newpage).click() # words = content # # Message: SyntaxError: unterminated string literal # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘') # # Message: SyntaxError: missing ; before statement # mytxt = mytxt.replace("'", '‘') # # 2000 头条 # mytxt = mytxt[0:2000] # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt # # # 需要键盘事件 反爬虫 # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(2, 5)) # # try: # # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"') # # browser.execute_script(js) # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"') # browser.execute_script(js) # time.sleep(3) # except Exception as jse: # print('.getElementsByTagName("textarea")--log-', jse) # continue # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(3) # try: # upload = browser.find_element_by_id('fileElem') # # logs_img = '' # img_url_list = img_list.split(',') # # for imgid in img_url_list: # img_url = 'http://192.168.2.212:83/file/get?type=star_helper&id=199'.replace('199', str(imgid)) # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default='default.DONOT_REMOVE.png') # print(local_img_path) # time.sleep(random.randint(2, 4)) # logs_img += img_url # logs_img += local_img_path # upload.send_keys(local_img_path) # time.sleep(random.randint(3, 7)) # except Exception as ee: # img_url_default = '' # img_url = img_url_default # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default='default.DONOT_REMOVE.png') # sleep(2) # logs_img += img_url # logs_img += local_img_path # # upload.send_keys(local_img_path) # logging.exception(ee) # # try: # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul' # browser.find_element_by_xpath(xp_newpage).click() # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a' # browser.find_element_by_xpath(xp_newpage).click() # # time.sleep(random.randint(8, 20)) # js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"' # browser.execute_script(js) # # time.sleep(random.randint(2, 5)) # xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(random.randint(3, 6)) # url_curr = browser.current_url # # with open('toutiao_success.log', 'a', encoding='utf-8') as f: # logs = '%s%s%s%s%s\n' % ( # time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt, # logs_img) # print(logs) # f.write(logs) sql = 'INSERT INTO joke__helper_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % ( dbid, res_url, int(time.time()), toutiao_uid) mysql_write(sql) print(sql) time.sleep(random.randint(20, 30)) js = 'window.location.href="https://www.wukong.com/"' js = 'window.location.href="https://www.toutiao.com/"' browser.execute_script(js) # except Exception as e_url_jump: # print('e_url_jump', e_url_jump) try: browser.quit() except Exception as e1: print(e1) logging.exception(e1) time.sleep(random.randint(120, 300))
xp_newpage = '//*[@id="title"]' mytxt = '林志玲捐款记录被翻出 单笔高达千万' browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
<div class='article' id='artibody'> <div class='img_wrapper'> <img alt='林志玲' src='http://n.sinaimg.cn/ent/transform/703/w253h450/20180416/77p2-fzcyxmv1344655.jpg'> <span class='img_descr'>林志玲</span> </div> <div class='img_wrapper'> <img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/CVWm-fzcyxmv1342897.jpg'> <span class='img_descr'>林志玲捐款记录</span> </div> <div class='img_wrapper'> <img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/hXMn-fzcyxmv1342914.jpg'> <span class='img_descr'>林志玲捐款记录</span> </div> <!--video-list--> <div class='video-2017' id='videoList0'></div> <!--/video-list--> <p> 新浪娱乐讯 据台湾媒体报道,林志玲 自出道以来,热心公益,甚至创立了自己的基金会,每年固定发行公益年历。近日明星从事公益的话题发烧,她也被网友挖出,几乎每个月都在转帐捐款,且其中一笔高达1000万人民币,更让网友惊呼连连。 </p> <div id='ad_44124' class='otherContent_01' style='display: block; margin: 10px 20px 10px 0px; float: left; overflow: hidden; clear: both; padding: 4px; width: 300px; height: 250px;'> </div> <p> 林志玲被网友翻出,2016年至2018年间的捐款纪录,几乎每个月都有记录,且最低都是人民币万元起跳,其中甚至有一笔高达1000万人民币,捐款项目是“筑巢行动”,不少人看到明细,都惊讶表示,原来志玲姐姐私下默默捐了这么多善款,还有人笑称:“她是不是拿着手机,无聊就转帐的那种人?”、“真的人美心也美”、“太圈粉了”、“志玲姐姐真的太低调了”。</p> <p> 43岁的林志玲1998年出道,从伸展台转战影视圈,尚未出名前就热心公益,更在2011年,主动以个人名义,成立“志玲姐姐慈善基金会”。她也固定每年拍摄公益写真年历,所得全数捐给儿福机构,或是帮助弱势孩童急难救助等,多年从不间断,且义卖期间,她从不公开做宣传,低调行善,受到不少人赞赏。</p> <p> 林志玲2016年受访曾透露,投入公益的契机,是因为身边罹癌友人的一句话,才让她下定决心。当时这位好友问她:“你希望离开后,怎样被大家记得?”她想了一想,认为既然是公众人物,就应该让大家记得自己微笑的样子,要用这样的身分,做些有影响力的事,从此将公益当做自我赋予的使命,一做就是好多年。ETtoday/文</p> <p class='article-editor'>(责编:kita)</p> <div style='font-size: 0px; height: 0px; clear: both;'></div> </div> <!-- 非定向300*250按钮 end --> </div>
from selenium import webdriver from time import sleep import time from selenium.webdriver.common.keys import Keys import os import requests import time import threading import logging import random start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())) os_sep = os.sep this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[ -1] logf = this_file_name + '.log' try: logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]', datefmt='%a, %d %b %Y %H:%M:%S', filename=logf, filemode='a') except Exception as e: s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e) with open(logf, 'a') as fo: fo.write(s) print(s) os._exit(4002) logging.info('START') img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png' img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\' def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'): r = '%s%s' % (img_dir, local_default) try: bytes = requests.get(img_url)._content r = '%s%s%s%s%s' % ( img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()), img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=', '_fxlequal_').replace( '&', '_fxland_'), '.png') if bytes != 0: with open(r, 'wb')as f: f.write(bytes) except Exception as e: print(e) return r import pymysql h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke', 'star_media_joke' def mysql_fetch(sql, res_type='tuple'): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8') except Exception as e: print(e) return () if res_type == 'dic': cursor = conn.cursor(pymysql.cursors.DictCursor) else: cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return cursor.fetchall() def mysql_write(sql): global h, pt, u, p, db try: conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8') except Exception as e: print(e) return 1 cursor = conn.cursor() cursor.execute(sql) conn.commit() cursor.close() conn.close() return 0 import random while True: logging.info('LOOP----') sql = 'SELECT username,password,toutiaoid FROM joke_star_joke_joke_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT (toutiaoid IS NULL OR toutiaoid="" )' sql = 'SELECT username,password,toutiaoid FROM joke_star_joke_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT (toutiaoid IS NULL OR toutiaoid="" )' res = mysql_fetch(sql) ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res] for ac in ac_l: myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid'] # 发布限制条件逻辑 sql = "SELECT * FROM joke_star_joke_relation_wukong_question WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); sql = "SELECT * FROM joke_joke_article_publish WHERE INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={} ORDER BY id DESC; ".format( toutiao_uid, int(time.time())); print(sql) logging.info(sql) res_content = mysql_fetch(sql, 'dic') if len(res_content) == 0: continue id_article_list = [i['id_article_list'] for i in res_content] sql = 'SELECT * FROM joke_joke_article WHERE id IN ({}) AND id NOT IN (SELECT article_id FROM joke_joke_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format( ','.join([i['id_article_list'] for i in res_content]), toutiao_uid) # sql = 'SELECT * FROM joke_star_joke_wukong_question WHERE id NOT IN (SELECT toutiao_uid FROM joke_star_joke_toutiaouser_wukong_question) LIMIT 1' logging.info(sql) res_content = mysql_fetch(sql, 'dic') if len(res_content) == 0: continue browser = webdriver.Chrome() f_url_l = ['https://www.toutiao.com/group/1589657566362638/', 'https://www.wukong.com/question/6388670742287876353/', 'https://www.wukong.com/tag/6215497898671475202/'] f_url_l += ['https://www.wukong.com/question/6512777037948649741/', 'https://www.wukong.com/question/6469247721038414093/', 'https://www.wukong.com/question/6481502080249889037/'] # f_url_l = [] f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514778729951003150/'] f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/', 'https://www.toutiao.com/a6513334304318161411/'] f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)] # browser.get(random.choice(f_url_l)) browser.get(f_url_l_a) time.sleep(random.randint(10, 20)) js = 'window.location.href="https://sso.toutiao.com/login/";' js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";' browser.execute_script(js) time.sleep(random.randint(10, 20)) # js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";' browser.execute_script(js) ac_type = 'qq' if ac_type == 'qq': myid, mypwd = ac['u'], ac['p'] xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]' browser.find_element_by_xpath(xp).click() time.sleep(10) js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"') browser.execute_script(js) js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"') browser.execute_script(js) time.sleep(random.randint(5, 15)) xp_newpage = '//*[@id="go"]' browser.find_element_by_xpath(xp_newpage).click() time.sleep(random.randint(10, 20)) elif ac_type == 'mail_qq': continue time.sleep(5) browser.refresh() js = 'window.location.href="https://www.toutiao.com/";' browser.execute_script(js) browser.refresh() time.sleep(6) js = 'window.location.href="https://www.wukong.com/";' js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";' js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) # title # js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"') # js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万') # browser.execute_script(js) xp_newpage = '//*[@id="title"]' mytxt = '林志玲捐款记录被翻出 单笔高达千万' browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) browser.find_element_by_xpath(xp_newpage).send_keys(mytxt) time.sleep(2) fhtml, dbhtml_str = 'toutaio.db.html', '' with open(fhtml, 'r', encoding='utf-8') as fr: for hi in fr: dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', '')) xp = '//*[@id="edui18_body"]/div[1]' # //*[@id="edui18_body"]/div[1] browser.find_element_by_xpath(xp).click() time.sleep(2) # //*[@id="images"]/div[1]/div xp = '//*[@id="images"]/div[1]/div' xp = '//*[@id="images"]/div[1]/div/span' browser.find_element_by_xpath(xp).click() time.sleep(1) db_html = dbhtml_str # db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>' js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format( db_html) browser.execute_script(js) xp = '//*[@id="edui18_body"]/div[1]' # //*[@id="edui18_body"]/div[1] browser.find_element_by_xpath(xp).click() time.sleep(2) # //*[@id="images"]/div[1]/div xp = '//*[@id="images"]/div[1]/div' xp = '//*[@id="images"]/div[1]/div/span' browser.find_element_by_xpath(xp).click() time.sleep(1) # xp='//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[2]/div[1]/div[2]/i' # browser.find_element_by_xpath(xp) # xp='//*[@id="pgc-text-img"]/div/div[1]/div[1]' # browser.find_element_by_xpath(xp) xp = '//*[@id="pgc-text-img"]/div/div[2]/div/button[1]' browser.find_element_by_xpath(xp) xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div/div/label[3]/div/input' browser.find_element_by_xpath(xp) time.sleep(2) time.sleep(2) xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]' browser.find_element_by_xpath(xp).click() dd = 9 # js = 'document.getElementsByClassName("ask")[0].click();' # browser.execute_script(js) # time.sleep(12) # time.sleep(random.randint(10, 20)) # # 需要键盘事件 反爬虫 # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input') # tmp_target.send_keys(Keys.SPACE) # tmp_target.send_keys(Keys.CONTROL, 'a') # tmp_target.send_keys(Keys.CONTROL, 'x') # tmp_target.send_keys(Keys.CONTROL, 'v') # tmp_target.send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(10, 20)) # res_content = [] for i in res_content[0:1]: dbid, content, img_list = i['id'], i['content'], i['img_list'] tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏'] tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生'] tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格'] s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么', random.choice(tmp_l_2), '的', random.choice(tmp_l)) js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s) browser.execute_script(js) time.sleep(12) # # tmp_target.send_keys(Keys.SPACE) js = 'document.getElementsByClassName("step-btn next")[0].click();' browser.execute_script(js) # step-btn submit js = 'document.getElementsByClassName("step-btn submit")[0].click();' browser.execute_script(js) time.sleep(12) # js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid) browser.execute_script(js) time.sleep(12) res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[ 0].get_attribute('href') # print(i) # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea' # try: # browser.find_element_by_xpath(xp_newpage) # except Exception as e: # print(e) # break # browser.find_element_by_xpath(xp_newpage).click() # words = content # # Message: SyntaxError: unterminated string literal # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘') # # Message: SyntaxError: missing ; before statement # mytxt = mytxt.replace("'", '‘') # # 2000 头条 # mytxt = mytxt[0:2000] # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt # # # 需要键盘事件 反爬虫 # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v') # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE) # time.sleep(random.randint(2, 5)) # # try: # # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"') # # browser.execute_script(js) # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"') # browser.execute_script(js) # time.sleep(3) # except Exception as jse: # print('.getElementsByTagName("textarea")--log-', jse) # continue # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(3) # try: # upload = browser.find_element_by_id('fileElem') # # logs_img = '' # img_url_list = img_list.split(',') # # for imgid in img_url_list: # img_url = 'http://192.168.2.212:83/file/get?type=star_joke&id=199'.replace('199', str(imgid)) # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default='default.DONOT_REMOVE.png') # print(local_img_path) # time.sleep(random.randint(2, 4)) # logs_img += img_url # logs_img += local_img_path # upload.send_keys(local_img_path) # time.sleep(random.randint(3, 7)) # except Exception as ee: # img_url_default = '' # img_url = img_url_default # local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url, # local_default='default.DONOT_REMOVE.png') # sleep(2) # logs_img += img_url # logs_img += local_img_path # # upload.send_keys(local_img_path) # logging.exception(ee) # # try: # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul' # browser.find_element_by_xpath(xp_newpage).click() # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a' # browser.find_element_by_xpath(xp_newpage).click() # # time.sleep(random.randint(8, 20)) # js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"' # browser.execute_script(js) # # time.sleep(random.randint(2, 5)) # xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a' # browser.find_element_by_xpath(xp_newpage).click() # time.sleep(random.randint(3, 6)) # url_curr = browser.current_url # # with open('toutiao_success.log', 'a', encoding='utf-8') as f: # logs = '%s%s%s%s%s\n' % ( # time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt, # logs_img) # print(logs) # f.write(logs) sql = 'INSERT INTO joke_joke_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % ( dbid, res_url, int(time.time()), toutiao_uid) mysql_write(sql) print(sql) time.sleep(random.randint(20, 30)) js = 'window.location.href="https://www.wukong.com/"' js = 'window.location.href="https://www.toutiao.com/"' browser.execute_script(js) # except Exception as e_url_jump: # print('e_url_jump', e_url_jump) try: browser.quit() except Exception as e1: print(e1) logging.exception(e1) time.sleep(random.randint(120, 300))
<img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" alt="pgc-image/152385934210854ceb909ec" _src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" buttonadded="true">
''' <img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true"> ''' dbhtml_str_ = dbhtml_str img_n = dbhtml_str_.count('<img') s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">' s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">' # s = "<img onload='editor.fireEvent(\'contentchange\')' src='https://p1.pstatp.com/large/pgc-image/TTimgCode' _src='https://p1.pstatp.com/large/pgc-image/TTimgCode' alt='pgc-image/TTimgCode' buttonadded='true'>" ss = '' l = pgc_img_url_l_toutiao for i in range(img_n): if i == 0: p1 = dbhtml_str.index('<img ', 0) else: p1 = dbhtml_str.index('<img ', p1 + 3) tmp = '{}{}'.format(dbhtml_str[0:p1].replace('>', 'X'), dbhtml_str[p1:]) p2 = tmp.index('>') ss = s.replace('TTimgCode', l[0].split('/')[-1]) dbhtml_str = '{}{}{}'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:]) del l[0] print('-----------------') print(dbhtml_str) time.sleep(2) js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";' browser.execute_script(js) time.sleep(6) xp_newpage = '//*[@id="title"]' mytxt = d['title'] browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE) browser.find_element_by_xpath(xp_newpage).send_keys(mytxt) time.sleep(2) # SAVE NOT DEL """ xp = '//*[@id="edui18_body"]/div[1]' # //*[@id="edui18_body"]/div[1] browser.find_element_by_xpath(xp).click() time.sleep(2) xp = '//*[@id="images"]/div[1]/div/span' browser.find_element_by_xpath(xp).click() time.sleep(3) ''' 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(dbhtml_str.replace('onload="editor.fireEvent(\'contentchange\')"','').replace('"',"'").replace('\n','')) ''' # 结合浏览器控制台,拼接符合语法的js字符串 r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''} dbhtml_str_py_js = dbhtml_str for k in r_d: dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k]) dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ') js = 'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"'.format( dbhtml_str_py_js) browser.execute_script(js) """ # 激活编辑区 browser.find_element_by_class_name('ql-container').click() # 进行上传图片圆圈操作 browser.find_element_by_class_name('icon-pic_tool').click() # 激活目标上传口 browser.find_element_by_class_name('tui-tab-list').find_elements_by_class_name('tui-tab')[-1].click() # 关闭上传口 browser.find_element_by_class_name('tui-tab-panel-active').find_elements_by_class_name('tui-btn')[ -1].click() # 结合浏览器控制台,拼接符合语法的js字符串 r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''} dbhtml_str_py_js = dbhtml_str for k in r_d: dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k]) dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ') #传入键盘化的html # document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML = "44" js = 'document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML ="{}"'.format( dbhtml_str_py_js) browser.execute_script(js)