园龄：3年9个月粉丝：2 关注：1
台湾通

import time


def xiamen():
    import pymysql
    import requests
    import time
    import re
    import datetime
    import hashlib

    def get_time(strs):
        if len(strs) > 25:
            # Mon Jan 18 10:05:38 +0800 2021
            s = strs.split(" ")
            new_s = s[5] + "-" + s[1] + "-" + s[2] + " " + s[3]
            new_strs = new_s.replace('Jan', '1').replace('Feb', '2').replace('Mar', '3').replace('Apr', '4').replace(
                'May',
                '5').replace(
                'Jun', '6').replace('Jul', '7').replace('Aug', '8').replace('Sep', '9').replace('Oct', '10').replace(
                'Nov',
                '11').replace(
                'Dec', '12')
            t = time.strptime(new_strs, "%Y-%m-%d %H:%M:%S")
            t = time.mktime(t)
            return int(t)
        num = int(re.search(r"\d+", strs).group())
        if "秒前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(seconds=num)).strftime("%Y-%m-%d %H:%M")
        elif "分钟前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(minutes=num)).strftime("%Y-%m-%d %H:%M")
        elif "小时前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(hours=num)).strftime("%Y-%m-%d %H:%M")
        elif "昨天" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + strs.split("昨天")[1]
        elif "日" in strs:
            t = strs.split("日")[1]
            ret = re.findall("(.*?)月(.*?)日.*?", strs)[0]
            times = "2020-" + ret[0] + "-" + ret[1] + t
        else:
            # strs='2020-06-07 13:09:21'
            # strs="06-09"
            q = strs.split("-")
            w = strs.split(":")
            if len(q) == 2:
                t = time.strptime("2020-" + strs, "%Y-%m-%d")
                t = time.mktime(t)
                return int(t)
            if len(q) == 3 and len(w) == 1:
                t = time.strptime(strs, "%Y-%m-%d")
                t = time.mktime(t)
                return int(t)
            if len(q) == 3 and len(w) == 2:
                t = time.strptime(strs, "%Y-%m-%d %H:%M")
                t = time.mktime(t)
                return int(t)
            else:
                t = time.strptime(strs, "%Y-%m-%d %H:%M:%S")
                # 将时间元组转换为时间戳
                t = time.mktime(t)
                return int(t)
        data_sj = time.strptime(str(times), "%Y年%m月%d日%H:%M")
        return int(time.mktime(data_sj))

    def get_md5(parmStr):
        # 1、参数必须是utf8
        # 2、python3所有字符都是unicode形式，已经不存在unicode关键字
        # 3、python3 str 实质上就是unicode
        if isinstance(parmStr, str):
            # 如果是unicode先转utf-8
            parmStr = parmStr.encode("utf-8")
        m = hashlib.md5()
        m.update(parmStr)
        return m.hexdigest()

    def save_mysql(timeStamp, biaoti, web_name, imgurl, videourl):
        dic = {}
        # print('&'*50)
        zq_time = datetime.datetime.now().strftime("%Y-%m-%d %X")
        # print(zq_time)
        try:
            dic['ir_title'] = str(biaoti)
            # dic["ir_authors"] = 'null'  # 用户名 / 作者
            # dic["ir_serviceid"] = 'null'  # 用户ID,
            dic["ir_urltime"] = timeStamp  # 发布时间 ,  要转换成时间戳
            # print(get_time(shijian))
            dic["ir_urldate"] = get_time(zq_time)  # 抓取时间,    要转换成时间戳
            dic["ir_content"] = ''  # 内容
            # dic["ir_nresrved1"] = 'null'  # 文章转发数,
            # dic["ir_nresrved2"] = 'null'  # 文章点赞数,
            # dic["ir_nresrved3"] = 'null'  # 文章评论数,
            dic["ir_librariytype"] = 2  # 固定 2
            dic["ir_score"] = 2  # 固定 2
            dic["if_vcj"] = 2  # 1是采集下来的2是外部链接。如果没有视频就传0
            dic["ir_mediasourceid"] = 8  # 1代表台湾网 2人民网 3新华网 4.央视网5.上海台6.东南台7.深圳台8.厦门台9.海峡台
            dic["ir_mediatype"] = 0  # 固定 ,  两岸为2
            dic["ir_mediasource"] = web_name  # 固定  新浪微博,
            dic["ir_indexsource"] = "mapi1.kxm.xmtv.cn"  # 固定 cna.com.tw,
            dic["ir_url"] = videourl  # 视频url,
            # print(dic['ir_url'])

            # dic["ir_keyword"] = 'null'  # 抓的搜索关键字，如果抓的用户下的则为null,
            dic["ir_area"] = 2  # 固定  2,
            dic["ir_trade"] = -1  # 固定 - 1,
            dic["ir_md5"] = hashlib.md5(
                videourl.encode(encoding='UTF-8')).hexdigest()  # get_md5(dic["ir_url"]).lower()  # 对链接做md5，全小写,
            print(dic["ir_md5"])
            dic["ir_istrand"] = 0
            dic[
                "ir_isv"] = 1  # 已认证返回的是True 未认证返回False         #是否加v 1不加v  2加v,  ir_isv   普通用户1   微博个人认证2   微博机构认证3  微博达人4

            dic["ir_imgbin"] = ''  # 图片链接

            dic["ir_imgurl"] = imgurl  # 图片原始链接
            dic["ir_videourl"] = videourl  # 如有视频，存入上传后的视频链接
            # print(dic["ir_imgbin"] )
            ### 数据库操作
            # 获取数据库链接
            # '''
            connection = pymysql.connect(
                host='140.210.4.73',
                port=3306,
                user='twipad_cj',
                passwd='bj@#twipad_cj',
                db='tw_ipaddb',
                charset='utf8mb4'
            )
            try:

                # 获取会话指针
                with connection.cursor() as cursor:
                    # 创建sql语句
                    sheet_name = 'tw_webhistory_abroaddataall'
                    sql = """INSERT INTO {}(ir_isv,ir_urltime,ir_urldate,ir_md5,\
                               ir_content,ir_librariytype,ir_score,if_vcj,ir_mediasourceid,ir_mediatype,ir_mediasource,ir_indexsource,\
                               ir_url,ir_title,ir_area,ir_trade,ir_istrand,ir_imgbin,ir_imgurl,ir_videourl)\
                               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s , %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
                        sheet_name)
                    # 执行sql语句
                    # print(sql)
                    try:
                        cursor.execute(sql, (
                            dic["ir_isv"], dic["ir_urltime"], dic["ir_urldate"], dic['ir_md5'],
                            dic["ir_content"],
                            dic["ir_librariytype"], dic["ir_score"], dic["if_vcj"], dic["ir_mediasourceid"],
                            dic["ir_mediatype"], dic["ir_mediasource"],
                            dic["ir_indexsource"], dic["ir_url"], dic["ir_title"], dic["ir_area"], dic["ir_trade"],
                            dic["ir_istrand"], dic["ir_imgbin"], dic["ir_imgurl"], dic["ir_videourl"]))
                        ir_idd = int(connection.insert_id())
                        print('数据库自增id', ir_idd, '数据')
                        # 提交数据库
                        connection.commit()
                        print("tw_webhistory_abroaddataall表数据存储成功！", )
                        urrl = 'http://twipad.hnxinxiudata.top/api/data/web_data?ir_id={}'.format(ir_idd)
                        r = requests.get(urrl)
                        print(r)
                    except Exception as pymysqlErr:
                        print('=' * 50)
                        print(pymysqlErr)
                        print("tw_webhistory_abroaddataall表数据已存在,")
                        return 1
                    # time.sleep(2)

            except Exception as e:
                raise (e, 1)

            finally:
                connection.close()
        except Exception as e:
            print(e)

    tc = 0
    zong_sum = 0
    web_name = '厦门台'

    headers = {

        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36'
    }

    url = f'https://mapi1.kxm.xmtv.cn/api/v1/contents.php?column_id=247&with_child=1&offset=0&count=9'
    html = requests.get(url=url, headers=headers).json()

    for item in html:
        # 记者
        author = item['author']
        # 标题
        biaoti = item['title']
        # 图片链接
        imgurl = item['index_pic']
        id = item['id']
        shijian = item['created_at']
        timeArray = time.strptime(shijian, "%Y-%m-%d %H:%M:%S")
        # 转换为时间戳:
        timeStamp = int(time.mktime(timeArray))
        print(shijian, timeStamp)
        # 视频链接
        videourl = f'https://2020.xmtv.cn/folder182/?lmdetail_id={id}'
        # print(timeStamp, biaoti, web_name, imgurl, videourl)
        tc_sum = save_mysql(timeStamp, biaoti, web_name, imgurl, videourl)
        time.sleep(2)
        if tc_sum == 1:
            tc += 1
            print("数据重复进入,2次退出", tc_sum, tc)
            if tc == 2:
                # return "获取完毕！"
                print("获取完毕！")
                pass

        zong_sum += 1
        print('***************第', zong_sum, '条***************')
def haixia():
    import pymysql
    import requests
    from lxml import etree
    import time
    import re
    import datetime
    import hashlib

    def get_time(strs):
        if len(strs) > 25:
            # Mon Jan 18 10:05:38 +0800 2021
            s = strs.split(" ")
            new_s = s[5] + "-" + s[1] + "-" + s[2] + " " + s[3]
            new_strs = new_s.replace('Jan', '1').replace('Feb', '2').replace('Mar', '3').replace('Apr', '4').replace(
                'May',
                '5').replace(
                'Jun', '6').replace('Jul', '7').replace('Aug', '8').replace('Sep', '9').replace('Oct', '10').replace(
                'Nov',
                '11').replace(
                'Dec', '12')
            t = time.strptime(new_strs, "%Y-%m-%d %H:%M:%S")
            t = time.mktime(t)
            return int(t)
        num = int(re.search(r"\d+", strs).group())
        if "秒前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(seconds=num)).strftime("%Y-%m-%d %H:%M")
        elif "分钟前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(minutes=num)).strftime("%Y-%m-%d %H:%M")
        elif "小时前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(hours=num)).strftime("%Y-%m-%d %H:%M")
        elif "昨天" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + strs.split("昨天")[1]
        elif "日" in strs:
            t = strs.split("日")[1]
            ret = re.findall("(.*?)月(.*?)日.*?", strs)[0]
            times = "2020-" + ret[0] + "-" + ret[1] + t
        else:
            # strs='2020-06-07 13:09:21'
            # strs="06-09"
            q = strs.split("-")
            w = strs.split(":")
            if len(q) == 2:
                t = time.strptime("2020-" + strs, "%Y-%m-%d")
                t = time.mktime(t)
                return int(t)
            if len(q) == 3 and len(w) == 1:
                t = time.strptime(strs, "%Y-%m-%d")
                t = time.mktime(t)
                return int(t)
            if len(q) == 3 and len(w) == 2:
                t = time.strptime(strs, "%Y-%m-%d %H:%M")
                t = time.mktime(t)
                return int(t)
            else:
                t = time.strptime(strs, "%Y-%m-%d %H:%M:%S")
                # 将时间元组转换为时间戳
                t = time.mktime(t)
                return int(t)
        data_sj = time.strptime(str(times), "%Y年%m月%d日%H:%M")
        return int(time.mktime(data_sj))

    def get_md5(parmStr):
        # 1、参数必须是utf8
        # 2、python3所有字符都是unicode形式，已经不存在unicode关键字
        # 3、python3 str 实质上就是unicode
        if isinstance(parmStr, str):
            # 如果是unicode先转utf-8
            parmStr = parmStr.encode("utf-8")
        m = hashlib.md5()
        m.update(parmStr)
        return m.hexdigest()

    def save_mysql(timeStamp, biaoti, web_name, imgurl, videourl):
        dic = {}
        # print('&'*50)
        zq_time = datetime.datetime.now().strftime("%Y-%m-%d %X")
        # print(zq_time)
        try:
            dic['ir_title'] = str(biaoti)
            # dic["ir_authors"] = 'null'  # 用户名 / 作者
            # dic["ir_serviceid"] = 'null'  # 用户ID,
            dic["ir_urltime"] = timeStamp  # 发布时间 ,  要转换成时间戳
            # print(get_time(shijian))
            dic["ir_urldate"] = get_time(zq_time)  # 抓取时间,    要转换成时间戳
            dic["ir_content"] = ''  # 内容
            # dic["ir_nresrved1"] = 'null'  # 文章转发数,
            # dic["ir_nresrved2"] = 'null'  # 文章点赞数,
            # dic["ir_nresrved3"] = 'null'  # 文章评论数,
            dic["ir_librariytype"] = 2  # 固定 2
            dic["ir_score"] = 2  # 固定 2
            dic["if_vcj"] = 2  # 1是采集下来的2是外部链接。如果没有视频就传0
            dic["ir_mediasourceid"] = 9  # 1代表台湾网 2人民网 3新华网 4.央视网5.上海台6.东南台7.深圳台8.厦门台9.海峡台
            dic["ir_mediatype"] = 0  # 固定 ,  两岸为2
            dic["ir_mediasource"] = web_name  # 固定  新浪微博,
            dic["ir_indexsource"] = "fjtv.net"  # 固定 cna.com.tw,
            dic["ir_url"] = videourl  # 视频url,
            # print(dic['ir_url'])

            # dic["ir_keyword"] = 'null'  # 抓的搜索关键字，如果抓的用户下的则为null,
            dic["ir_area"] = 2  # 固定  2,
            dic["ir_trade"] = -1  # 固定 - 1,
            dic["ir_md5"] = hashlib.md5(
                videourl.encode(encoding='UTF-8')).hexdigest()  # get_md5(dic["ir_url"]).lower()  # 对链接做md5，全小写,
            print(dic["ir_md5"])
            dic["ir_istrand"] = 0
            dic[
                "ir_isv"] = 1  # 已认证返回的是True 未认证返回False         #是否加v 1不加v  2加v,  ir_isv   普通用户1   微博个人认证2   微博机构认证3  微博达人4

            dic["ir_imgbin"] = ''  # 图片链接

            dic["ir_imgurl"] = imgurl  # 图片原始链接
            dic["ir_videourl"] = videourl  # 如有视频，存入上传后的视频链接
            # print(dic["ir_imgbin"] )
            ### 数据库操作
            # 获取数据库链接
            # '''
            connection = pymysql.connect(
                host='140.210.4.73',
                port=3306,
                user='twipad_cj',
                passwd='bj@#twipad_cj',
                db='tw_ipaddb',
                charset='utf8mb4'
            )

            try:

                # 获取会话指针
                with connection.cursor() as cursor:
                    # 创建sql语句
                    sheet_name = 'tw_webhistory_abroaddataall'
                    sql = """INSERT INTO {}(ir_isv,ir_urltime,ir_urldate,ir_md5,\
                               ir_content,ir_librariytype,ir_score,if_vcj,ir_mediasourceid,ir_mediatype,ir_mediasource,ir_indexsource,\
                               ir_url,ir_title,ir_area,ir_trade,ir_istrand,ir_imgbin,ir_imgurl,ir_videourl)\
                               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s , %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
                        sheet_name)
                    # 执行sql语句
                    # print(sql)
                    try:
                        cursor.execute(sql, (
                            dic["ir_isv"], dic["ir_urltime"], dic["ir_urldate"], dic['ir_md5'],
                            dic["ir_content"],
                            dic["ir_librariytype"], dic["ir_score"], dic["if_vcj"], dic["ir_mediasourceid"],
                            dic["ir_mediatype"], dic["ir_mediasource"],
                            dic["ir_indexsource"], dic["ir_url"], dic["ir_title"], dic["ir_area"], dic["ir_trade"],
                            dic["ir_istrand"], dic["ir_imgbin"], dic["ir_imgurl"], dic["ir_videourl"]))
                        ir_idd = int(connection.insert_id())
                        print('数据库自增id', ir_idd, '数据')
                        # 提交数据库
                        connection.commit()
                        print("tw_webhistory_abroaddataall表数据存储成功！", )
                        urrl = 'http://twipad.hnxinxiudata.top/api/data/web_data?ir_id={}'.format(ir_idd)
                        r = requests.get(urrl)
                        print(r)
                    except Exception as pymysqlErr:
                        print('=' * 50)
                        print(pymysqlErr)
                        print("tw_webhistory_abroaddataall表数据已存在,")
                        return 1
                    # time.sleep(2)

            except Exception as e:
                raise (e, 1)

            finally:
                connection.close()
        except Exception as e:
            print(e)

    tc = 0
    zong_sum = 0
    web_name = '海峡台'
    url = 'http://www.fjtv.net/folder526/folder536/folder691/?pp=0'
    headers = {
        'cookie': 'user_visit=1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36',
    }
    html = requests.get(url=url, headers=headers).content.decode()
    tree = etree.HTML(html)
    all_list = tree.xpath('//div[@class="templet templet_4"]')
    # print(len(all_list))
    for data in all_list:

        biaoti = data.xpath('./div[@class="jieshao"]/p/a//text()')[0]
        videourl = data.xpath('./a//@href')[0]
        imgurl = data.xpath('./a/img//@src')[0]
        shijian = re.findall('(.*?)《今日海峡》', biaoti)[0]
        shijian = (''.join(shijian)).replace('月', '-').replace('年', '-').replace('日', ' ')
        shijian = shijian + '00:00:00'
        timeArray = time.strptime(shijian, "%Y-%m-%d %H:%M:%S")
        # 转换为时间戳:
        timeStamp = int(time.mktime(timeArray))
        print(timeStamp, biaoti, web_name, imgurl, videourl)
        tc_sum = save_mysql(timeStamp, biaoti, web_name, imgurl, videourl)
        time.sleep(2)
        if tc_sum == 1:
            tc += 1
            print("数据重复进入,2次退出", tc_sum, tc)
            if tc == 2:
                # return "获取完毕！"
                print("获取完毕！")
                pass

        zong_sum += 1
        print('***************第', zong_sum, '条***************')

def shenzhen():
    import pymysql
    import requests
    import time
    import random
    import re
    import datetime
    import hashlib

    def get_time(strs):
        if len(strs) > 25:
            # Mon Jan 18 10:05:38 +0800 2021
            s = strs.split(" ")
            new_s = s[5] + "-" + s[1] + "-" + s[2] + " " + s[3]
            new_strs = new_s.replace('Jan', '1').replace('Feb', '2').replace('Mar', '3').replace('Apr', '4').replace(
                'May',
                '5').replace(
                'Jun', '6').replace('Jul', '7').replace('Aug', '8').replace('Sep', '9').replace('Oct', '10').replace(
                'Nov',
                '11').replace(
                'Dec', '12')
            t = time.strptime(new_strs, "%Y-%m-%d %H:%M:%S")
            t = time.mktime(t)
            return int(t)
        num = int(re.search(r"\d+", strs).group())
        if "秒前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(seconds=num)).strftime("%Y-%m-%d %H:%M")
        elif "分钟前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(minutes=num)).strftime("%Y-%m-%d %H:%M")
        elif "小时前" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(hours=num)).strftime("%Y-%m-%d %H:%M")
        elif "昨天" in strs:
            times = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + strs.split("昨天")[1]
        elif "日" in strs:
            t = strs.split("日")[1]
            ret = re.findall("(.*?)月(.*?)日.*?", strs)[0]
            times = "2020-" + ret[0] + "-" + ret[1] + t
        else:
            # strs='2020-06-07 13:09:21'
            # strs="06-09"
            q = strs.split("-")
            w = strs.split(":")
            if len(q) == 2:
                t = time.strptime("2020-" + strs, "%Y-%m-%d")
                t = time.mktime(t)
                return int(t)
            if len(q) == 3 and len(w) == 1:
                t = time.strptime(strs, "%Y-%m-%d")
                t = time.mktime(t)
                return int(t)
            if len(q) == 3 and len(w) == 2:
                t = time.strptime(strs, "%Y-%m-%d %H:%M")
                t = time.mktime(t)
                return int(t)
            else:
                t = time.strptime(strs, "%Y-%m-%d %H:%M:%S")
                # 将时间元组转换为时间戳
                t = time.mktime(t)
                return int(t)
        data_sj = time.strptime(str(times), "%Y年%m月%d日%H:%M")
        return int(time.mktime(data_sj))

    def get_md5(parmStr):
        # 1、参数必须是utf8
        # 2、python3所有字符都是unicode形式，已经不存在unicode关键字
        # 3、python3 str 实质上就是unicode
        if isinstance(parmStr, str):
            # 如果是unicode先转utf-8
            parmStr = parmStr.encode("utf-8")
        m = hashlib.md5()
        m.update(parmStr)
        return m.hexdigest()

    def save_mysql(timeStamp, biaoti, web_name, imgurl, videourl):
        dic = {}
        # print('&'*50)
        zq_time = datetime.datetime.now().strftime("%Y-%m-%d %X")
        # print(zq_time)
        try:
            dic['ir_title'] = str(biaoti)
            # dic["ir_authors"] = 'null'  # 用户名 / 作者
            # dic["ir_serviceid"] = 'null'  # 用户ID,
            dic["ir_urltime"] = timeStamp  # 发布时间 ,  要转换成时间戳
            # print(get_time(shijian))
            dic["ir_urldate"] = get_time(zq_time)  # 抓取时间,    要转换成时间戳
            dic["ir_content"] = ''  # 内容
            # dic["ir_nresrved1"] = 'null'  # 文章转发数,
            # dic["ir_nresrved2"] = 'null'  # 文章点赞数,
            # dic["ir_nresrved3"] = 'null'  # 文章评论数,
            dic["ir_librariytype"] = 2  # 固定 2
            dic["ir_score"] = 2  # 固定 2
            dic["if_vcj"] = 2  # 1是采集下来的2是外部链接。如果没有视频就传0
            dic["ir_mediasourceid"] = 7  # 1代表台湾网 2人民网 3新华网 4.央视网5.上海台6.东南台7.深圳台8.厦门台9.海峡台
            dic["ir_mediatype"] = 0  # 固定 ,  两岸为2
            dic["ir_mediasource"] = web_name  # 固定  新浪微博,
            dic["ir_indexsource"] = "mapi1.kxm.xmtv.cn"  # 固定 cna.com.tw,
            dic["ir_url"] = videourl  # 视频url,
            # print(dic['ir_url'])

            # dic["ir_keyword"] = 'null'  # 抓的搜索关键字，如果抓的用户下的则为null,
            dic["ir_area"] = 2  # 固定  2,
            dic["ir_trade"] = -1  # 固定 - 1,
            dic["ir_md5"] = hashlib.md5(
                videourl.encode(encoding='UTF-8')).hexdigest()  # get_md5(dic["ir_url"]).lower()  # 对链接做md5，全小写,
            print(dic["ir_md5"])
            dic["ir_istrand"] = 0
            dic[
                "ir_isv"] = 1  # 已认证返回的是True 未认证返回False         #是否加v 1不加v  2加v,  ir_isv   普通用户1   微博个人认证2   微博机构认证3  微博达人4

            dic["ir_imgbin"] = ''  # 图片链接

            dic["ir_imgurl"] = imgurl  # 图片原始链接
            dic["ir_videourl"] = videourl  # 如有视频，存入上传后的视频链接
            # print(dic["ir_imgbin"] )
            ### 数据库操作
            # 获取数据库链接
            # '''
            connection = pymysql.connect(
                host='140.210.4.73',
                port=3306,
                user='twipad_cj',
                passwd='bj@#twipad_cj',
                db='tw_ipaddb',
                charset='utf8mb4'
            )
            try:

                # 获取会话指针
                with connection.cursor() as cursor:
                    # 创建sql语句
                    sheet_name = 'tw_webhistory_abroaddataall'
                    sql = """INSERT INTO {}(ir_isv,ir_urltime,ir_urldate,ir_md5,\
                               ir_content,ir_librariytype,ir_score,if_vcj,ir_mediasourceid,ir_mediatype,ir_mediasource,ir_indexsource,\
                               ir_url,ir_title,ir_area,ir_trade,ir_istrand,ir_imgbin,ir_imgurl,ir_videourl)\
                               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s , %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
                        sheet_name)
                    # 执行sql语句
                    # print(sql)
                    try:
                        cursor.execute(sql, (
                            dic["ir_isv"], dic["ir_urltime"], dic["ir_urldate"], dic['ir_md5'],
                            dic["ir_content"],
                            dic["ir_librariytype"], dic["ir_score"], dic["if_vcj"], dic["ir_mediasourceid"],
                            dic["ir_mediatype"], dic["ir_mediasource"],
                            dic["ir_indexsource"], dic["ir_url"], dic["ir_title"], dic["ir_area"], dic["ir_trade"],
                            dic["ir_istrand"], dic["ir_imgbin"], dic["ir_imgurl"], dic["ir_videourl"]))
                        ir_idd = int(connection.insert_id())
                        print('数据库自增id', ir_idd, '数据')
                        # 提交数据库
                        connection.commit()
                        print("tw_webhistory_abroaddataall表数据存储成功！", )
                        urrl = 'http://twipad.hnxinxiudata.top/api/data/web_data?ir_id={}'.format(ir_idd)
                        r = requests.get(urrl)
                        print(r)
                    except Exception as pymysqlErr:
                        print('=' * 50)
                        print(pymysqlErr)
                        print("tw_webhistory_abroaddataall表数据已存在,")
                        return 1
                    # time.sleep(2)

            except Exception as e:
                raise (e, 1)

            finally:
                connection.close()
        except Exception as e:
            print(e)

    tc = 0
    zong_sum = 0
    web_name = '深圳台'
    headers = {

        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36'
    }

    url = f'https://api.scms.sztv.com.cn/api/com/article/getArticleList?tenantId=ysz&specialtype=1&banner=1&catalogId=7900&page=1'
    print(url)
    html = requests.get(url=url, headers=headers).json()

    for item in html['returnData']['news']:
        time.sleep(random.randint(1, 3))
        # 记者
        author = item['author']
        # 标题
        biaoti = item['title']
        # 图片链接
        imgurl = item['logo']

        # 视频链接
        id = item['id']
        videourl = f'https://www.sztv.com.cn/ysz/dsdb/szws/zbgat/{id}.shtml'

        shijian = item['publishDate']
        timeArray = time.strptime(shijian, "%Y-%m-%d %H:%M:%S")
        # 转换为时间戳:
        timeStamp = int(time.mktime(timeArray))
        print(shijian, timeStamp)
        tc_sum = save_mysql(timeStamp, biaoti, web_name, imgurl, videourl)
        time.sleep(2)
        if tc_sum == 1:
            tc += 1
            print("数据重复进入,2次退出", tc_sum, tc)
            if tc == 2:
                # return "获取完毕！"
                print("获取完毕！")
                pass

        zong_sum += 1
        print('***************第', zong_sum, '条***************')
while 1:
    if __name__ == '__main__':

        xiamen()
        haixia()
        shenzhen()
        time.sleep(24*3600)