文章编辑器 文本替换 操作dom 发帖 富文本 今日头条发布富文本的实现 键盘化的html

 

 

 

 

 

 

 

js  修改  iframe

 

 

 

it=document.getElementById('ueditor_0').contentWindow.document.getElementsByTagName("body")[0];

it.innerHTML='<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'

 

 

 

from selenium import webdriver
from  time import sleep
import time
from selenium.webdriver.common.keys import Keys
import os

import requests
import time
import threading
import logging
import random

start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
    -1]
logf = this_file_name + '.log'
try:
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
                        datefmt='%a, %d %b %Y %H:%M:%S',
                        filename=logf,
                        filemode='a')
except Exception as e:
    s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
    with open(logf, 'a') as fo:
        fo.write(s)
        print(s)
        os._exit(4002)

logging.info('START')

img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png'
img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\'


def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'):
    r = '%s%s' % (img_dir, local_default)
    try:
        bytes = requests.get(img_url)._content
        r = '%s%s%s%s%s' % (
            img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
            img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=',
                                                                                                     '_fxlequal_').replace(
                '&', '_fxland_'), '.png')
        if bytes != 0:
            with open(r, 'wb')as f:
                f.write(bytes)
    except Exception as e:
        print(e)
    return r


import pymysql

h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke_', 'star_media_helper'


def mysql_fetch(sql, res_type='tuple'):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
        print(e)
        return ()
    if res_type == 'dic':
        cursor = conn.cursor(pymysql.cursors.DictCursor)
    else:

        cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return cursor.fetchall()


def mysql_write(sql):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
        print(e)
        return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0


import random

while True:
    logging.info('LOOP----')
    sql = 'SELECT username,password,toutiaoid  FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )'
    sql = 'SELECT username,password,toutiaoid  FROM joke__star_helper_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )'
    res = mysql_fetch(sql)
    ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res]
    for ac in ac_l:
        myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid']
        # 发布限制条件逻辑
        sql = "SELECT * FROM joke__star_helper_relation_wukong_question  WHERE  INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        sql = "SELECT * FROM joke__helper_article_publish  WHERE  INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        print(sql)
        logging.info(sql)
        res_content = mysql_fetch(sql, 'dic')
        if len(res_content) == 0:
            continue
        id_article_list = [i['id_article_list'] for i in res_content]

        sql = 'SELECT * FROM joke__helper_article WHERE id IN ({}) AND id  NOT IN (SELECT article_id FROM  joke__helper_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format(
            ','.join([i['id_article_list'] for i in res_content]), toutiao_uid)
        # sql = 'SELECT * FROM joke__star_helper_wukong_question WHERE id  NOT IN (SELECT toutiao_uid FROM  joke__star_helper_toutiaouser_wukong_question) LIMIT 1'
        logging.info(sql)
        res_content = mysql_fetch(sql, 'dic')
        if len(res_content) == 0:
            continue

        browser = webdriver.Chrome()
        f_url_l = ['https://www.toutiao.com/group/1589657566362638/',
                   'https://www.wukong.com/question/6388670742287876353/',
                   'https://www.wukong.com/tag/6215497898671475202/']
        f_url_l += ['https://www.wukong.com/question/6512777037948649741/',
                    'https://www.wukong.com/question/6469247721038414093/',
                    'https://www.wukong.com/question/6481502080249889037/']
        # f_url_l = []
        f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514661446876398088/',
                   'https://www.toutiao.com/a6514778729951003150/']
        f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/',
                    'https://www.toutiao.com/a6513334304318161411/']
        f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
        # browser.get(random.choice(f_url_l))
        browser.get(f_url_l_a)
        time.sleep(random.randint(10, 20))

        js = 'window.location.href="https://sso.toutiao.com/login/";'
        js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";'
        browser.execute_script(js)
        time.sleep(random.randint(10, 20))

        #  js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";'
        browser.execute_script(js)

        ac_type = 'qq'
        if ac_type == 'qq':
            myid, mypwd = ac['u'], ac['p']
            xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]'
            browser.find_element_by_xpath(xp).click()
            time.sleep(10)
            js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"')
            browser.execute_script(js)
            js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"')
            browser.execute_script(js)
            time.sleep(random.randint(5, 15))
            xp_newpage = '//*[@id="go"]'
            browser.find_element_by_xpath(xp_newpage).click()
            time.sleep(random.randint(10, 20))
        elif ac_type == 'mail_qq':
            continue

        time.sleep(5)

        browser.refresh()
        js = 'window.location.href="https://www.toutiao.com/";'
        browser.execute_script(js)
        browser.refresh()

        time.sleep(6)

        js = 'window.location.href="https://www.wukong.com/";'
        js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";'
        js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
        browser.execute_script(js)

        time.sleep(6)

        # title
        js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"')
        js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万')

        browser.execute_script(js)
        time.sleep(2)

        fhtml, dbhtml_str = 'toutaio.db.html', ''
        with open(fhtml, 'r', encoding='utf-8') as fr:
            for hi in fr:
                dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', ''))

        db_html = dbhtml_str
      #  db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
        js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(
            db_html)
        browser.execute_script(js)
        time.sleep(2)

        xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
        xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
        browser.find_element_by_xpath(xp).click()
        dd = 9
        # js = 'document.getElementsByClassName("ask")[0].click();'
        # browser.execute_script(js)
        # time.sleep(12)

        # time.sleep(random.randint(10, 20))
        # # 需要键盘事件 反爬虫
        # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input')
        # tmp_target.send_keys(Keys.SPACE)
        # tmp_target.send_keys(Keys.CONTROL, 'a')
        # tmp_target.send_keys(Keys.CONTROL, 'x')
        # tmp_target.send_keys(Keys.CONTROL, 'v')
        # tmp_target.send_keys(Keys.BACK_SPACE)
        # time.sleep(random.randint(10, 20))


        # res_content = []
        for i in res_content[0:1]:
            dbid, content, img_list = i['id'], i['content'], i['img_list']

            tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏']
            tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生']
            tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格']

            s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么',
                                        random.choice(tmp_l_2), '的', random.choice(tmp_l))
            js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s)
            browser.execute_script(js)
            time.sleep(12)
            #
           # tmp_target.send_keys(Keys.SPACE)

            js = 'document.getElementsByClassName("step-btn next")[0].click();'
            browser.execute_script(js)

            # step-btn submit

            js = 'document.getElementsByClassName("step-btn submit")[0].click();'
            browser.execute_script(js)
            time.sleep(12)

            #
            js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid)
            browser.execute_script(js)
            time.sleep(12)
            res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[
                0].get_attribute('href')

            # print(i)
            # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea'
            # try:
            #     browser.find_element_by_xpath(xp_newpage)
            # except Exception as e:
            #     print(e)
            #     break
            # browser.find_element_by_xpath(xp_newpage).click()
            # words = content
            # # Message: SyntaxError: unterminated string literal
            # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘')
            # # Message: SyntaxError: missing ; before statement
            # mytxt = mytxt.replace("'", '‘')
            # # 2000 头条
            # mytxt = mytxt[0:2000]
            # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt
            #
            # # 需要键盘事件 反爬虫
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
            # time.sleep(random.randint(2, 5))
            #
            # try:
            #     # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"')
            #     # browser.execute_script(js)
            #     js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"')
            #     browser.execute_script(js)
            #     time.sleep(3)
            # except Exception as jse:
            #     print('.getElementsByTagName("textarea")--log-', jse)
            #     continue
            #
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span'
            # browser.find_element_by_xpath(xp_newpage).click()
            # time.sleep(3)
            # try:
            #     upload = browser.find_element_by_id('fileElem')
            #
            #     logs_img = ''
            #     img_url_list = img_list.split(',')
            #
            #     for imgid in img_url_list:
            #         img_url = 'http://192.168.2.212:83/file/get?type=star_helper&id=199'.replace('199', str(imgid))
            #         local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                                 local_default='default.DONOT_REMOVE.png')
            #         print(local_img_path)
            #         time.sleep(random.randint(2, 4))
            #         logs_img += img_url
            #         logs_img += local_img_path
            #         upload.send_keys(local_img_path)
            #         time.sleep(random.randint(3, 7))
            # except Exception as ee:
            #     img_url_default = ''
            #     img_url = img_url_default
            #     local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                             local_default='default.DONOT_REMOVE.png')
            #     sleep(2)
            #     logs_img += img_url
            #     logs_img += local_img_path
            #     # upload.send_keys(local_img_path)
            #     logging.exception(ee)
            #
            # try:
            #     xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #
            #     time.sleep(random.randint(8, 20))
            #     js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"'
            #     browser.execute_script(js)
            #
            #     time.sleep(random.randint(2, 5))
            #     xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     time.sleep(random.randint(3, 6))
            #     url_curr = browser.current_url
            #
            #     with open('toutiao_success.log', 'a', encoding='utf-8') as f:
            #         logs = '%s%s%s%s%s\n' % (
            #             time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
            #             logs_img)
            #         print(logs)
            #         f.write(logs)

            sql = 'INSERT INTO  joke__helper_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % (
                dbid, res_url, int(time.time()), toutiao_uid)
            mysql_write(sql)
            print(sql)
            time.sleep(random.randint(20, 30))
            js = 'window.location.href="https://www.wukong.com/"'
            js = 'window.location.href="https://www.toutiao.com/"'
            browser.execute_script(js)
            # except Exception as e_url_jump:
            #     print('e_url_jump', e_url_jump)
    try:
        browser.quit()
    except Exception as e1:
        print(e1)
        logging.exception(e1)

time.sleep(random.randint(120, 300))

 

 

 

        xp_newpage = '//*[@id="title"]'
        mytxt = '林志玲捐款记录被翻出 单笔高达千万'
        browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
        browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)

  

 

 

 

 

<div class='article' id='artibody'>
    <div class='img_wrapper'>
        <img alt='林志玲' src='http://n.sinaimg.cn/ent/transform/703/w253h450/20180416/77p2-fzcyxmv1344655.jpg'>
        <span class='img_descr'>林志玲</span>
    </div>
    <div class='img_wrapper'>
        <img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/CVWm-fzcyxmv1342897.jpg'>
        <span class='img_descr'>林志玲捐款记录</span>
    </div>
    <div class='img_wrapper'>
        <img alt='林志玲捐款记录' src='http://n.sinaimg.cn/ent/transform/250/w600h450/20180416/hXMn-fzcyxmv1342914.jpg'>
        <span class='img_descr'>林志玲捐款记录</span>
    </div>            <!--video-list-->
    <div class='video-2017' id='videoList0'></div>

    <!--/video-list-->
    <p>  新浪娱乐讯 据台湾媒体报道,林志玲
        自出道以来,热心公益,甚至创立了自己的基金会,每年固定发行公益年历。近日明星从事公益的话题发烧,她也被网友挖出,几乎每个月都在转帐捐款,且其中一笔高达1000万人民币,更让网友惊呼连连。
    </p>
    <div id='ad_44124' class='otherContent_01'
         style='display: block; margin: 10px 20px 10px 0px; float: left; overflow: hidden; clear: both; padding: 4px; width: 300px; height: 250px;'>

    </div>
    <p>
          林志玲被网友翻出,2016年至2018年间的捐款纪录,几乎每个月都有记录,且最低都是人民币万元起跳,其中甚至有一笔高达1000万人民币,捐款项目是“筑巢行动”,不少人看到明细,都惊讶表示,原来志玲姐姐私下默默捐了这么多善款,还有人笑称:“她是不是拿着手机,无聊就转帐的那种人?”、“真的人美心也美”、“太圈粉了”、“志玲姐姐真的太低调了”。</p>
    <p>
          43岁的林志玲1998年出道,从伸展台转战影视圈,尚未出名前就热心公益,更在2011年,主动以个人名义,成立“志玲姐姐慈善基金会”。她也固定每年拍摄公益写真年历,所得全数捐给儿福机构,或是帮助弱势孩童急难救助等,多年从不间断,且义卖期间,她从不公开做宣传,低调行善,受到不少人赞赏。</p>
    <p>
          林志玲2016年受访曾透露,投入公益的契机,是因为身边罹癌友人的一句话,才让她下定决心。当时这位好友问她:“你希望离开后,怎样被大家记得?”她想了一想,认为既然是公众人物,就应该让大家记得自己微笑的样子,要用这样的身分,做些有影响力的事,从此将公益当做自我赋予的使命,一做就是好多年。ETtoday/文</p>
    <p class='article-editor'>(责编:kita)</p>
    <div style='font-size: 0px; height: 0px; clear: both;'></div>

</div>
<!-- 非定向300*250按钮  end -->
</div>

  

 

from selenium import webdriver
from  time import sleep
import time
from selenium.webdriver.common.keys import Keys
import os

import requests
import time
import threading
import logging
import random

start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
    -1]
logf = this_file_name + '.log'
try:
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
                        datefmt='%a, %d %b %Y %H:%M:%S',
                        filename=logf,
                        filemode='a')
except Exception as e:
    s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
    with open(logf, 'a') as fo:
        fo.write(s)
        print(s)
        os._exit(4002)

logging.info('START')

img_url = 'https://s3.pstatp.com/toutiao/static/img/logo.201f80d.png'
img_dir = 'C:\\Users\\sas\\PycharmProjects\\py_win_to_unix\\crontab_chk_url\\personas\\trunk\\plugins\\spider\\dl_img_tmp\\'


def spider_webimg_dl_return_local_img_path(img_dir, img_url, local_default='default.DONOT_REMOVE.png'):
    r = '%s%s' % (img_dir, local_default)
    try:
        bytes = requests.get(img_url)._content
        r = '%s%s%s%s%s' % (
            img_dir, time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())), str(threading.get_ident()),
            img_url.replace('/', '_xl_').replace(':', '_fxl_').replace('?', '_fxlquestion_').replace('=',
                                                                                                     '_fxlequal_').replace(
                '&', '_fxland_'), '.png')
        if bytes != 0:
            with open(r, 'wb')as f:
                f.write(bytes)
    except Exception as e:
        print(e)
    return r


import pymysql

h, pt, u, p, db = '192.168.2.210', 3306, 'root', 'joke', 'star_media_joke'


def mysql_fetch(sql, res_type='tuple'):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
        print(e)
        return ()
    if res_type == 'dic':
        cursor = conn.cursor(pymysql.cursors.DictCursor)
    else:

        cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return cursor.fetchall()


def mysql_write(sql):
    global h, pt, u, p, db
    try:
        conn = pymysql.connect(host=h, port=pt, user=u, passwd=p, db=db, charset='utf8')
    except Exception as e:
        print(e)
        return 1
    cursor = conn.cursor()
    cursor.execute(sql)
    conn.commit()
    cursor.close()
    conn.close()
    return 0


import random

while True:
    logging.info('LOOP----')
    sql = 'SELECT username,password,toutiaoid  FROM joke_star_joke_joke_namepwd WHERE status=1 AND category=1 AND id>236 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )'
    sql = 'SELECT username,password,toutiaoid  FROM joke_star_joke_namepwd WHERE status=1 AND category=1 AND id=7856582 AND NOT  (toutiaoid IS NULL OR toutiaoid="" )'
    res = mysql_fetch(sql)
    ac_l = [{'u': i[0], 'p': i[1], 'toutiao_uid': i[2]} for i in res]
    for ac in ac_l:
        myid, mypwd, toutiao_uid = ac['u'], ac['p'], ac['toutiao_uid']
        # 发布限制条件逻辑
        sql = "SELECT * FROM joke_star_joke_relation_wukong_question  WHERE  INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        sql = "SELECT * FROM joke_joke_article_publish  WHERE  INSTR(CONCAT(',',id_toutiao_uid_list,','),CONCAT(',','{}',',')) AND time_effective<={}  ORDER BY id DESC; ".format(
            toutiao_uid, int(time.time()));
        print(sql)
        logging.info(sql)
        res_content = mysql_fetch(sql, 'dic')
        if len(res_content) == 0:
            continue
        id_article_list = [i['id_article_list'] for i in res_content]

        sql = 'SELECT * FROM joke_joke_article WHERE id IN ({}) AND id  NOT IN (SELECT article_id FROM  joke_joke_article_publish_result WHERE 1 AND toutiao_uid="{}" ) LIMIT 2; '.format(
            ','.join([i['id_article_list'] for i in res_content]), toutiao_uid)
        # sql = 'SELECT * FROM joke_star_joke_wukong_question WHERE id  NOT IN (SELECT toutiao_uid FROM  joke_star_joke_toutiaouser_wukong_question) LIMIT 1'
        logging.info(sql)
        res_content = mysql_fetch(sql, 'dic')
        if len(res_content) == 0:
            continue

        browser = webdriver.Chrome()
        f_url_l = ['https://www.toutiao.com/group/1589657566362638/',
                   'https://www.wukong.com/question/6388670742287876353/',
                   'https://www.wukong.com/tag/6215497898671475202/']
        f_url_l += ['https://www.wukong.com/question/6512777037948649741/',
                    'https://www.wukong.com/question/6469247721038414093/',
                    'https://www.wukong.com/question/6481502080249889037/']
        # f_url_l = []
        f_url_l = ['https://www.toutiao.com/a6514526304476332552/', 'https://www.toutiao.com/a6514778729951003150/']
        f_url_l += ['https://www.toutiao.com/a6514216125151052291/', 'https://www.toutiao.com/a6512315164463727111/',
                    'https://www.toutiao.com/a6513334304318161411/']
        f_url_l_a = f_url_l[int(time.time()) % len(f_url_l)]
        # browser.get(random.choice(f_url_l))
        browser.get(f_url_l_a)
        time.sleep(random.randint(10, 20))

        js = 'window.location.href="https://sso.toutiao.com/login/";'
        js = 'window.location.href="https://sso.toutiao.com/login/?service=https://mp.toutiao.com/sso_confirm/?redirect_url=/";'
        browser.execute_script(js)
        time.sleep(random.randint(10, 20))

        #  js = 'window.location.href="https://sso.toutiao.com/login/?service=https%3A%2F%2Fwww.wukong.com%2Fwenda%2Fwelcome%2F#type=0";'
        browser.execute_script(js)

        ac_type = 'qq'
        if ac_type == 'qq':
            myid, mypwd = ac['u'], ac['p']
            xp = '/html/body/div/div/div[2]/div/div/div/ul/li[3]'
            browser.find_element_by_xpath(xp).click()
            time.sleep(10)
            js = '%s%s%s' % ('document.getElementById("u").value="', myid, '"')
            browser.execute_script(js)
            js = '%s%s%s' % ('document.getElementById("p").value="', mypwd, '"')
            browser.execute_script(js)
            time.sleep(random.randint(5, 15))
            xp_newpage = '//*[@id="go"]'
            browser.find_element_by_xpath(xp_newpage).click()
            time.sleep(random.randint(10, 20))
        elif ac_type == 'mail_qq':
            continue

        time.sleep(5)

        browser.refresh()
        js = 'window.location.href="https://www.toutiao.com/";'
        browser.execute_script(js)
        browser.refresh()

        time.sleep(6)

        js = 'window.location.href="https://www.wukong.com/";'
        js = 'window.location.href="https://mp.toutiao.com/profile_v2/publish/";'
        js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
        browser.execute_script(js)

        time.sleep(6)

        # title
        # js = '%s%s%s' % ('document.getElementById("title").value="', '林志玲捐款记录被翻出 单笔高达千万', '"')
        # js = 'document.getElementById("title").value="{}"'.format('林志玲捐款记录被翻出 单笔高达千万')
        # browser.execute_script(js)

        xp_newpage = '//*[@id="title"]'
        mytxt = '林志玲捐款记录被翻出 单笔高达千万'
        browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
        browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)

        time.sleep(2)

        fhtml, dbhtml_str = 'toutaio.db.html', ''
        with open(fhtml, 'r', encoding='utf-8') as fr:
            for hi in fr:
                dbhtml_str = '{}{}'.format(dbhtml_str, hi.replace('\n', ''))

        xp = '//*[@id="edui18_body"]/div[1]'
        # //*[@id="edui18_body"]/div[1]
        browser.find_element_by_xpath(xp).click()
        time.sleep(2)
        # //*[@id="images"]/div[1]/div
        xp = '//*[@id="images"]/div[1]/div'
        xp = '//*[@id="images"]/div[1]/div/span'
        browser.find_element_by_xpath(xp).click()
        time.sleep(1)

        db_html = dbhtml_str
        #  db_html = '<li ><img src="https://www.baidu.com/img/baidu_jgylogo3.gif" alt="qq_383" title="qq_3503"></li><li >qq_3203</li><li >2017年04月13日 10:02</li><li ><span>3074</span></li>'
        js = 'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(
            db_html)
        browser.execute_script(js)
        xp = '//*[@id="edui18_body"]/div[1]'
        # //*[@id="edui18_body"]/div[1]
        browser.find_element_by_xpath(xp).click()
        time.sleep(2)
        # //*[@id="images"]/div[1]/div
        xp = '//*[@id="images"]/div[1]/div'
        xp = '//*[@id="images"]/div[1]/div/span'
        browser.find_element_by_xpath(xp).click()
        time.sleep(1)
        # xp='//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div[2]/div[1]/div[2]/i'
        # browser.find_element_by_xpath(xp)
        # xp='//*[@id="pgc-text-img"]/div/div[1]/div[1]'
        # browser.find_element_by_xpath(xp)
        xp = '//*[@id="pgc-text-img"]/div/div[2]/div/button[1]'
        browser.find_element_by_xpath(xp)

        xp = '//*[@id="graphic"]/div/div/div[2]/div[2]/div[1]/div[2]/div/div/div/div/label[3]/div/input'
        browser.find_element_by_xpath(xp)

        time.sleep(2)
        time.sleep(2)
        xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
        xp = '//*[@id="graphic"]/div/div/div[2]/div[3]/div[2]/div[1]'
        browser.find_element_by_xpath(xp).click()
        dd = 9
        # js = 'document.getElementsByClassName("ask")[0].click();'
        # browser.execute_script(js)
        # time.sleep(12)

        # time.sleep(random.randint(10, 20))
        # # 需要键盘事件 反爬虫
        # tmp_target = browser.find_element_by_class_name('input-box').find_element_by_tag_name('input')
        # tmp_target.send_keys(Keys.SPACE)
        # tmp_target.send_keys(Keys.CONTROL, 'a')
        # tmp_target.send_keys(Keys.CONTROL, 'x')
        # tmp_target.send_keys(Keys.CONTROL, 'v')
        # tmp_target.send_keys(Keys.BACK_SPACE)
        # time.sleep(random.randint(10, 20))


        # res_content = []
        for i in res_content[0:1]:
            dbid, content, img_list = i['id'], i['content'], i['img_list']

            tmp_l = ['口红', '指甲油', '护发素', '沐浴露', '洗手液', '洗发水', '牙膏']
            tmp_l_1 = ['老人', '小孩', '白领', '前台妹子', '行政妹子', '大学生', '高中生']
            tmp_l_2 = ['类型', '特质', '种类', '价位', '原材料', '主要成分', '价格']

            s = '{}{}{}{}{}{}{}'.format(str(random.randint(1, 12)), '月份,', random.choice(tmp_l_1), '适合使用什么',
                                        random.choice(tmp_l_2), '的', random.choice(tmp_l))
            js = 'document.getElementsByClassName("input-box")[0].childNodes[0].value="{}";'.format(s)
            browser.execute_script(js)
            time.sleep(12)
            #
            # tmp_target.send_keys(Keys.SPACE)

            js = 'document.getElementsByClassName("step-btn next")[0].click();'
            browser.execute_script(js)

            # step-btn submit

            js = 'document.getElementsByClassName("step-btn submit")[0].click();'
            browser.execute_script(js)
            time.sleep(12)

            #
            js = 'window.location.href="https://www.wukong.com/user/?uid={}&type=1";'.format(toutiao_uid)
            browser.execute_script(js)
            time.sleep(12)
            res_url = browser.find_element_by_class_name('question-title').find_elements_by_tag_name('a')[
                0].get_attribute('href')

            # print(i)
            # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/textarea'
            # try:
            #     browser.find_element_by_xpath(xp_newpage)
            # except Exception as e:
            #     print(e)
            #     break
            # browser.find_element_by_xpath(xp_newpage).click()
            # words = content
            # # Message: SyntaxError: unterminated string literal
            # mytxt = words.replace('\n', ' ').replace('\r', ' ').replace('\\br', ' ').replace('"', '“').replace("'", '‘')
            # # Message: SyntaxError: missing ; before statement
            # mytxt = mytxt.replace("'", '‘')
            # # 2000 头条
            # mytxt = mytxt[0:2000]
            # mytxt = '好消息' if len(mytxt.replace(' ', '')) == 0 else mytxt
            #
            # # 需要键盘事件 反爬虫
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'a')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'x')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.CONTROL, 'v')
            # # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.BACK_SPACE)
            # time.sleep(random.randint(2, 5))
            #
            # try:
            #     # js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', '', '"')
            #     # browser.execute_script(js)
            #     js = '%s%s%s' % ('document.getElementsByTagName("textarea")[0].value="', mytxt, '"')
            #     browser.execute_script(js)
            #     time.sleep(3)
            # except Exception as jse:
            #     print('.getElementsByTagName("textarea")--log-', jse)
            #     continue
            #
            # browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            # xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[1]/span[1]/span'
            # browser.find_element_by_xpath(xp_newpage).click()
            # time.sleep(3)
            # try:
            #     upload = browser.find_element_by_id('fileElem')
            #
            #     logs_img = ''
            #     img_url_list = img_list.split(',')
            #
            #     for imgid in img_url_list:
            #         img_url = 'http://192.168.2.212:83/file/get?type=star_joke&id=199'.replace('199', str(imgid))
            #         local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                                 local_default='default.DONOT_REMOVE.png')
            #         print(local_img_path)
            #         time.sleep(random.randint(2, 4))
            #         logs_img += img_url
            #         logs_img += local_img_path
            #         upload.send_keys(local_img_path)
            #         time.sleep(random.randint(3, 7))
            # except Exception as ee:
            #     img_url_default = ''
            #     img_url = img_url_default
            #     local_img_path = spider_webimg_dl_return_local_img_path(img_dir, img_url,
            #                                                             local_default='default.DONOT_REMOVE.png')
            #     sleep(2)
            #     logs_img += img_url
            #     logs_img += local_img_path
            #     # upload.send_keys(local_img_path)
            #     logging.exception(ee)
            #
            # try:
            #     xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/ul'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     xp_newpage = '/html/body/div/div[2]/div[2]/div[1]/div/div/div/div/div[1]/div[2]/a'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #
            #     time.sleep(random.randint(8, 20))
            #     js = 'document.getElementsByClassName("ugc-mode-content")[0].getElementsByTagName("a")[0].target="_self"'
            #     browser.execute_script(js)
            #
            #     time.sleep(random.randint(2, 5))
            #     xp_newpage = '/html/body/div/div[2]/div[2]/div[2]/ul/li[1]/div/div[2]/div/div[2]/a'
            #     browser.find_element_by_xpath(xp_newpage).click()
            #     time.sleep(random.randint(3, 6))
            #     url_curr = browser.current_url
            #
            #     with open('toutiao_success.log', 'a', encoding='utf-8') as f:
            #         logs = '%s%s%s%s%s\n' % (
            #             time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), ac_type, myid[0:4], mytxt,
            #             logs_img)
            #         print(logs)
            #         f.write(logs)

            sql = 'INSERT INTO  joke_joke_article_publish_result (article_id,article_url,time_script,toutiao_uid) VALUE("%s","%s","%s","%s");' % (
                dbid, res_url, int(time.time()), toutiao_uid)
            mysql_write(sql)
            print(sql)
            time.sleep(random.randint(20, 30))
            js = 'window.location.href="https://www.wukong.com/"'
            js = 'window.location.href="https://www.toutiao.com/"'
            browser.execute_script(js)
            # except Exception as e_url_jump:
            #     print('e_url_jump', e_url_jump)
    try:
        browser.quit()
    except Exception as e1:
        print(e1)
        logging.exception(e1)

time.sleep(random.randint(120, 300))

  

<img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" alt="pgc-image/152385934210854ceb909ec" _src="https://p1.pstatp.com/large/pgc-image/152385934210854ceb909ec" buttonadded="true">

  

 

 

 

            '''
           <img onload="editor.fireEvent('contentchange')" src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" _src="https://p1.pstatp.com/large/pgc-image/15238623686755f9e3c409a" alt="pgc-image/15238623686755f9e3c409a" buttonadded="true"> 
           '''
            dbhtml_str_ = dbhtml_str
            img_n = dbhtml_str_.count('<img')
            s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">'
            s = '<img onload="editor.fireEvent(\'contentchange\')" src="https://p1.pstatp.com/large/pgc-image/TTimgCode" _src="https://p1.pstatp.com/large/pgc-image/TTimgCode" alt="pgc-image/TTimgCode" buttonadded="true">'
            #     s = "<img onload='editor.fireEvent(\'contentchange\')' src='https://p1.pstatp.com/large/pgc-image/TTimgCode' _src='https://p1.pstatp.com/large/pgc-image/TTimgCode' alt='pgc-image/TTimgCode' buttonadded='true'>"
            ss = ''
            l = pgc_img_url_l_toutiao
            for i in range(img_n):
                if i == 0:
                    p1 = dbhtml_str.index('<img ', 0)
                else:
                    p1 = dbhtml_str.index('<img ', p1 + 3)

                tmp = '{}{}'.format(dbhtml_str[0:p1].replace('>', 'X'), dbhtml_str[p1:])
                p2 = tmp.index('>')
                ss = s.replace('TTimgCode', l[0].split('/')[-1])
                dbhtml_str = '{}{}{}'.format(dbhtml_str[0:p1], ss, dbhtml_str[p2 + 1:])
                del l[0]
            print('-----------------')
            print(dbhtml_str)
            time.sleep(2)
            js = 'window.location.href="https://mp.toutiao.com/profile_v3/graphic/publish";'
            browser.execute_script(js)
            time.sleep(6)
            xp_newpage = '//*[@id="title"]'
            mytxt = d['title']
            browser.find_element_by_xpath(xp_newpage).send_keys(Keys.SPACE)
            browser.find_element_by_xpath(xp_newpage).send_keys(mytxt)
            time.sleep(2)

            # SAVE NOT DEL
            """
            xp = '//*[@id="edui18_body"]/div[1]'
            # //*[@id="edui18_body"]/div[1]
            browser.find_element_by_xpath(xp).click()
            time.sleep(2)
            xp = '//*[@id="images"]/div[1]/div/span'
            browser.find_element_by_xpath(xp).click()
            time.sleep(3)
            '''
            'it=document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0];it.innerHTML="{}"'.format(dbhtml_str.replace('onload="editor.fireEvent(\'contentchange\')"','').replace('"',"'").replace('\n',''))

           '''
            # 结合浏览器控制台,拼接符合语法的js字符串
            r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''}
            dbhtml_str_py_js = dbhtml_str
            for k in r_d:
                dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
            dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ')
            js = 'document.getElementById("ueditor_0").contentWindow.document.getElementsByTagName("body")[0].innerHTML="{}"'.format(
                dbhtml_str_py_js)
            browser.execute_script(js)
           """
            # 激活编辑区
            browser.find_element_by_class_name('ql-container').click()
            # 进行上传图片圆圈操作
            browser.find_element_by_class_name('icon-pic_tool').click()
            # 激活目标上传口
            browser.find_element_by_class_name('tui-tab-list').find_elements_by_class_name('tui-tab')[-1].click()
            # 关闭上传口
            browser.find_element_by_class_name('tui-tab-panel-active').find_elements_by_class_name('tui-btn')[
                -1].click()

            # 结合浏览器控制台,拼接符合语法的js字符串
            r_d = {'onload="editor.fireEvent(\'contentchange\')"': '', '"': "'", '\n': ''}
            dbhtml_str_py_js = dbhtml_str
            for k in r_d:
                dbhtml_str_py_js = dbhtml_str_py_js.replace(k, r_d[k])
            dbhtml_str_py_js = dbhtml_str_py_js.replace('nbsp;', ' ')
            #传入键盘化的html
            # document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML = "44"
            js = 'document.getElementsByClassName("ql-editor ql-blank")[0].innerHTML ="{}"'.format(
                dbhtml_str_py_js)
            browser.execute_script(js)

  

 

 

 

 

 

 

 

 

  

posted @ 2018-03-06 16:09  papering  阅读(874)  评论(0编辑  收藏  举报