台湾通
import time
def xiamen():
import pymysql
import requests
import time
import re
import datetime
import hashlib
def get_time(strs):
if len(strs) > 25:
# Mon Jan 18 10:05:38 +0800 2021
s = strs.split(" ")
new_s = s[5] + "-" + s[1] + "-" + s[2] + " " + s[3]
new_strs = new_s.replace('Jan', '1').replace('Feb', '2').replace('Mar', '3').replace('Apr', '4').replace(
'May',
'5').replace(
'Jun', '6').replace('Jul', '7').replace('Aug', '8').replace('Sep', '9').replace('Oct', '10').replace(
'Nov',
'11').replace(
'Dec', '12')
t = time.strptime(new_strs, "%Y-%m-%d %H:%M:%S")
t = time.mktime(t)
return int(t)
num = int(re.search(r"\d+", strs).group())
if "秒前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(seconds=num)).strftime("%Y-%m-%d %H:%M")
elif "分钟前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(minutes=num)).strftime("%Y-%m-%d %H:%M")
elif "小时前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(hours=num)).strftime("%Y-%m-%d %H:%M")
elif "昨天" in strs:
times = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + strs.split("昨天")[1]
elif "日" in strs:
t = strs.split("日")[1]
ret = re.findall("(.*?)月(.*?)日.*?", strs)[0]
times = "2020-" + ret[0] + "-" + ret[1] + t
else:
# strs='2020-06-07 13:09:21'
# strs="06-09"
q = strs.split("-")
w = strs.split(":")
if len(q) == 2:
t = time.strptime("2020-" + strs, "%Y-%m-%d")
t = time.mktime(t)
return int(t)
if len(q) == 3 and len(w) == 1:
t = time.strptime(strs, "%Y-%m-%d")
t = time.mktime(t)
return int(t)
if len(q) == 3 and len(w) == 2:
t = time.strptime(strs, "%Y-%m-%d %H:%M")
t = time.mktime(t)
return int(t)
else:
t = time.strptime(strs, "%Y-%m-%d %H:%M:%S")
# 将时间元组转换为时间戳
t = time.mktime(t)
return int(t)
data_sj = time.strptime(str(times), "%Y年%m月%d日%H:%M")
return int(time.mktime(data_sj))
def get_md5(parmStr):
# 1、参数必须是utf8
# 2、python3所有字符都是unicode形式,已经不存在unicode关键字
# 3、python3 str 实质上就是unicode
if isinstance(parmStr, str):
# 如果是unicode先转utf-8
parmStr = parmStr.encode("utf-8")
m = hashlib.md5()
m.update(parmStr)
return m.hexdigest()
def save_mysql(timeStamp, biaoti, web_name, imgurl, videourl):
dic = {}
# print('&'*50)
zq_time = datetime.datetime.now().strftime("%Y-%m-%d %X")
# print(zq_time)
try:
dic['ir_title'] = str(biaoti)
# dic["ir_authors"] = 'null' # 用户名 / 作者
# dic["ir_serviceid"] = 'null' # 用户ID,
dic["ir_urltime"] = timeStamp # 发布时间 , 要转换成时间戳
# print(get_time(shijian))
dic["ir_urldate"] = get_time(zq_time) # 抓取时间, 要转换成时间戳
dic["ir_content"] = '' # 内容
# dic["ir_nresrved1"] = 'null' # 文章转发数,
# dic["ir_nresrved2"] = 'null' # 文章点赞数,
# dic["ir_nresrved3"] = 'null' # 文章评论数,
dic["ir_librariytype"] = 2 # 固定 2
dic["ir_score"] = 2 # 固定 2
dic["if_vcj"] = 2 # 1是采集下来的2是外部链接。如果没有视频就传0
dic["ir_mediasourceid"] = 8 # 1代表台湾网 2人民网 3新华网 4.央视网5.上海台6.东南台7.深圳台8.厦门台9.海峡台
dic["ir_mediatype"] = 0 # 固定 , 两岸为2
dic["ir_mediasource"] = web_name # 固定 新浪微博,
dic["ir_indexsource"] = "mapi1.kxm.xmtv.cn" # 固定 cna.com.tw,
dic["ir_url"] = videourl # 视频url,
# print(dic['ir_url'])
# dic["ir_keyword"] = 'null' # 抓的搜索关键字,如果抓的用户下的则为null,
dic["ir_area"] = 2 # 固定 2,
dic["ir_trade"] = -1 # 固定 - 1,
dic["ir_md5"] = hashlib.md5(
videourl.encode(encoding='UTF-8')).hexdigest() # get_md5(dic["ir_url"]).lower() # 对链接做md5,全小写,
print(dic["ir_md5"])
dic["ir_istrand"] = 0
dic[
"ir_isv"] = 1 # 已认证返回的是True 未认证返回False #是否加v 1不加v 2加v, ir_isv 普通用户1 微博个人认证2 微博机构认证3 微博达人4
dic["ir_imgbin"] = '' # 图片链接
dic["ir_imgurl"] = imgurl # 图片原始链接
dic["ir_videourl"] = videourl # 如有视频,存入上传后的视频链接
# print(dic["ir_imgbin"] )
### 数据库操作
# 获取数据库链接
# '''
connection = pymysql.connect(
host='140.210.4.73',
port=3306,
user='twipad_cj',
passwd='bj@#twipad_cj',
db='tw_ipaddb',
charset='utf8mb4'
)
try:
# 获取会话指针
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'tw_webhistory_abroaddataall'
sql = """INSERT INTO {}(ir_isv,ir_urltime,ir_urldate,ir_md5,\
ir_content,ir_librariytype,ir_score,if_vcj,ir_mediasourceid,ir_mediatype,ir_mediasource,ir_indexsource,\
ir_url,ir_title,ir_area,ir_trade,ir_istrand,ir_imgbin,ir_imgurl,ir_videourl)\
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s , %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)
# 执行sql语句
# print(sql)
try:
cursor.execute(sql, (
dic["ir_isv"], dic["ir_urltime"], dic["ir_urldate"], dic['ir_md5'],
dic["ir_content"],
dic["ir_librariytype"], dic["ir_score"], dic["if_vcj"], dic["ir_mediasourceid"],
dic["ir_mediatype"], dic["ir_mediasource"],
dic["ir_indexsource"], dic["ir_url"], dic["ir_title"], dic["ir_area"], dic["ir_trade"],
dic["ir_istrand"], dic["ir_imgbin"], dic["ir_imgurl"], dic["ir_videourl"]))
ir_idd = int(connection.insert_id())
print('数据库自增id', ir_idd, '数据')
# 提交数据库
connection.commit()
print("tw_webhistory_abroaddataall表数据存储成功!", )
urrl = 'http://twipad.hnxinxiudata.top/api/data/web_data?ir_id={}'.format(ir_idd)
r = requests.get(urrl)
print(r)
except Exception as pymysqlErr:
print('=' * 50)
print(pymysqlErr)
print("tw_webhistory_abroaddataall表数据已存在,")
return 1
# time.sleep(2)
except Exception as e:
raise (e, 1)
finally:
connection.close()
except Exception as e:
print(e)
tc = 0
zong_sum = 0
web_name = '厦门台'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36'
}
url = f'https://mapi1.kxm.xmtv.cn/api/v1/contents.php?column_id=247&with_child=1&offset=0&count=9'
html = requests.get(url=url, headers=headers).json()
for item in html:
# 记者
author = item['author']
# 标题
biaoti = item['title']
# 图片链接
imgurl = item['index_pic']
id = item['id']
shijian = item['created_at']
timeArray = time.strptime(shijian, "%Y-%m-%d %H:%M:%S")
# 转换为时间戳:
timeStamp = int(time.mktime(timeArray))
print(shijian, timeStamp)
# 视频链接
videourl = f'https://2020.xmtv.cn/folder182/?lmdetail_id={id}'
# print(timeStamp, biaoti, web_name, imgurl, videourl)
tc_sum = save_mysql(timeStamp, biaoti, web_name, imgurl, videourl)
time.sleep(2)
if tc_sum == 1:
tc += 1
print("数据重复进入,2次退出", tc_sum, tc)
if tc == 2:
# return "获取完毕!"
print("获取完毕!")
pass
zong_sum += 1
print('***************第', zong_sum, '条***************')
def haixia():
import pymysql
import requests
from lxml import etree
import time
import re
import datetime
import hashlib
def get_time(strs):
if len(strs) > 25:
# Mon Jan 18 10:05:38 +0800 2021
s = strs.split(" ")
new_s = s[5] + "-" + s[1] + "-" + s[2] + " " + s[3]
new_strs = new_s.replace('Jan', '1').replace('Feb', '2').replace('Mar', '3').replace('Apr', '4').replace(
'May',
'5').replace(
'Jun', '6').replace('Jul', '7').replace('Aug', '8').replace('Sep', '9').replace('Oct', '10').replace(
'Nov',
'11').replace(
'Dec', '12')
t = time.strptime(new_strs, "%Y-%m-%d %H:%M:%S")
t = time.mktime(t)
return int(t)
num = int(re.search(r"\d+", strs).group())
if "秒前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(seconds=num)).strftime("%Y-%m-%d %H:%M")
elif "分钟前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(minutes=num)).strftime("%Y-%m-%d %H:%M")
elif "小时前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(hours=num)).strftime("%Y-%m-%d %H:%M")
elif "昨天" in strs:
times = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + strs.split("昨天")[1]
elif "日" in strs:
t = strs.split("日")[1]
ret = re.findall("(.*?)月(.*?)日.*?", strs)[0]
times = "2020-" + ret[0] + "-" + ret[1] + t
else:
# strs='2020-06-07 13:09:21'
# strs="06-09"
q = strs.split("-")
w = strs.split(":")
if len(q) == 2:
t = time.strptime("2020-" + strs, "%Y-%m-%d")
t = time.mktime(t)
return int(t)
if len(q) == 3 and len(w) == 1:
t = time.strptime(strs, "%Y-%m-%d")
t = time.mktime(t)
return int(t)
if len(q) == 3 and len(w) == 2:
t = time.strptime(strs, "%Y-%m-%d %H:%M")
t = time.mktime(t)
return int(t)
else:
t = time.strptime(strs, "%Y-%m-%d %H:%M:%S")
# 将时间元组转换为时间戳
t = time.mktime(t)
return int(t)
data_sj = time.strptime(str(times), "%Y年%m月%d日%H:%M")
return int(time.mktime(data_sj))
def get_md5(parmStr):
# 1、参数必须是utf8
# 2、python3所有字符都是unicode形式,已经不存在unicode关键字
# 3、python3 str 实质上就是unicode
if isinstance(parmStr, str):
# 如果是unicode先转utf-8
parmStr = parmStr.encode("utf-8")
m = hashlib.md5()
m.update(parmStr)
return m.hexdigest()
def save_mysql(timeStamp, biaoti, web_name, imgurl, videourl):
dic = {}
# print('&'*50)
zq_time = datetime.datetime.now().strftime("%Y-%m-%d %X")
# print(zq_time)
try:
dic['ir_title'] = str(biaoti)
# dic["ir_authors"] = 'null' # 用户名 / 作者
# dic["ir_serviceid"] = 'null' # 用户ID,
dic["ir_urltime"] = timeStamp # 发布时间 , 要转换成时间戳
# print(get_time(shijian))
dic["ir_urldate"] = get_time(zq_time) # 抓取时间, 要转换成时间戳
dic["ir_content"] = '' # 内容
# dic["ir_nresrved1"] = 'null' # 文章转发数,
# dic["ir_nresrved2"] = 'null' # 文章点赞数,
# dic["ir_nresrved3"] = 'null' # 文章评论数,
dic["ir_librariytype"] = 2 # 固定 2
dic["ir_score"] = 2 # 固定 2
dic["if_vcj"] = 2 # 1是采集下来的2是外部链接。如果没有视频就传0
dic["ir_mediasourceid"] = 9 # 1代表台湾网 2人民网 3新华网 4.央视网5.上海台6.东南台7.深圳台8.厦门台9.海峡台
dic["ir_mediatype"] = 0 # 固定 , 两岸为2
dic["ir_mediasource"] = web_name # 固定 新浪微博,
dic["ir_indexsource"] = "fjtv.net" # 固定 cna.com.tw,
dic["ir_url"] = videourl # 视频url,
# print(dic['ir_url'])
# dic["ir_keyword"] = 'null' # 抓的搜索关键字,如果抓的用户下的则为null,
dic["ir_area"] = 2 # 固定 2,
dic["ir_trade"] = -1 # 固定 - 1,
dic["ir_md5"] = hashlib.md5(
videourl.encode(encoding='UTF-8')).hexdigest() # get_md5(dic["ir_url"]).lower() # 对链接做md5,全小写,
print(dic["ir_md5"])
dic["ir_istrand"] = 0
dic[
"ir_isv"] = 1 # 已认证返回的是True 未认证返回False #是否加v 1不加v 2加v, ir_isv 普通用户1 微博个人认证2 微博机构认证3 微博达人4
dic["ir_imgbin"] = '' # 图片链接
dic["ir_imgurl"] = imgurl # 图片原始链接
dic["ir_videourl"] = videourl # 如有视频,存入上传后的视频链接
# print(dic["ir_imgbin"] )
### 数据库操作
# 获取数据库链接
# '''
connection = pymysql.connect(
host='140.210.4.73',
port=3306,
user='twipad_cj',
passwd='bj@#twipad_cj',
db='tw_ipaddb',
charset='utf8mb4'
)
try:
# 获取会话指针
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'tw_webhistory_abroaddataall'
sql = """INSERT INTO {}(ir_isv,ir_urltime,ir_urldate,ir_md5,\
ir_content,ir_librariytype,ir_score,if_vcj,ir_mediasourceid,ir_mediatype,ir_mediasource,ir_indexsource,\
ir_url,ir_title,ir_area,ir_trade,ir_istrand,ir_imgbin,ir_imgurl,ir_videourl)\
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s , %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)
# 执行sql语句
# print(sql)
try:
cursor.execute(sql, (
dic["ir_isv"], dic["ir_urltime"], dic["ir_urldate"], dic['ir_md5'],
dic["ir_content"],
dic["ir_librariytype"], dic["ir_score"], dic["if_vcj"], dic["ir_mediasourceid"],
dic["ir_mediatype"], dic["ir_mediasource"],
dic["ir_indexsource"], dic["ir_url"], dic["ir_title"], dic["ir_area"], dic["ir_trade"],
dic["ir_istrand"], dic["ir_imgbin"], dic["ir_imgurl"], dic["ir_videourl"]))
ir_idd = int(connection.insert_id())
print('数据库自增id', ir_idd, '数据')
# 提交数据库
connection.commit()
print("tw_webhistory_abroaddataall表数据存储成功!", )
urrl = 'http://twipad.hnxinxiudata.top/api/data/web_data?ir_id={}'.format(ir_idd)
r = requests.get(urrl)
print(r)
except Exception as pymysqlErr:
print('=' * 50)
print(pymysqlErr)
print("tw_webhistory_abroaddataall表数据已存在,")
return 1
# time.sleep(2)
except Exception as e:
raise (e, 1)
finally:
connection.close()
except Exception as e:
print(e)
tc = 0
zong_sum = 0
web_name = '海峡台'
url = 'http://www.fjtv.net/folder526/folder536/folder691/?pp=0'
headers = {
'cookie': 'user_visit=1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36',
}
html = requests.get(url=url, headers=headers).content.decode()
tree = etree.HTML(html)
all_list = tree.xpath('//div[@class="templet templet_4"]')
# print(len(all_list))
for data in all_list:
biaoti = data.xpath('./div[@class="jieshao"]/p/a//text()')[0]
videourl = data.xpath('./a//@href')[0]
imgurl = data.xpath('./a/img//@src')[0]
shijian = re.findall('(.*?)《今日海峡》', biaoti)[0]
shijian = (''.join(shijian)).replace('月', '-').replace('年', '-').replace('日', ' ')
shijian = shijian + '00:00:00'
timeArray = time.strptime(shijian, "%Y-%m-%d %H:%M:%S")
# 转换为时间戳:
timeStamp = int(time.mktime(timeArray))
print(timeStamp, biaoti, web_name, imgurl, videourl)
tc_sum = save_mysql(timeStamp, biaoti, web_name, imgurl, videourl)
time.sleep(2)
if tc_sum == 1:
tc += 1
print("数据重复进入,2次退出", tc_sum, tc)
if tc == 2:
# return "获取完毕!"
print("获取完毕!")
pass
zong_sum += 1
print('***************第', zong_sum, '条***************')
def shenzhen():
import pymysql
import requests
import time
import random
import re
import datetime
import hashlib
def get_time(strs):
if len(strs) > 25:
# Mon Jan 18 10:05:38 +0800 2021
s = strs.split(" ")
new_s = s[5] + "-" + s[1] + "-" + s[2] + " " + s[3]
new_strs = new_s.replace('Jan', '1').replace('Feb', '2').replace('Mar', '3').replace('Apr', '4').replace(
'May',
'5').replace(
'Jun', '6').replace('Jul', '7').replace('Aug', '8').replace('Sep', '9').replace('Oct', '10').replace(
'Nov',
'11').replace(
'Dec', '12')
t = time.strptime(new_strs, "%Y-%m-%d %H:%M:%S")
t = time.mktime(t)
return int(t)
num = int(re.search(r"\d+", strs).group())
if "秒前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(seconds=num)).strftime("%Y-%m-%d %H:%M")
elif "分钟前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(minutes=num)).strftime("%Y-%m-%d %H:%M")
elif "小时前" in strs:
times = (datetime.datetime.now() - datetime.timedelta(hours=num)).strftime("%Y-%m-%d %H:%M")
elif "昨天" in strs:
times = (datetime.datetime.now() - datetime.timedelta(days=1)).strftime("%Y-%m-%d") + strs.split("昨天")[1]
elif "日" in strs:
t = strs.split("日")[1]
ret = re.findall("(.*?)月(.*?)日.*?", strs)[0]
times = "2020-" + ret[0] + "-" + ret[1] + t
else:
# strs='2020-06-07 13:09:21'
# strs="06-09"
q = strs.split("-")
w = strs.split(":")
if len(q) == 2:
t = time.strptime("2020-" + strs, "%Y-%m-%d")
t = time.mktime(t)
return int(t)
if len(q) == 3 and len(w) == 1:
t = time.strptime(strs, "%Y-%m-%d")
t = time.mktime(t)
return int(t)
if len(q) == 3 and len(w) == 2:
t = time.strptime(strs, "%Y-%m-%d %H:%M")
t = time.mktime(t)
return int(t)
else:
t = time.strptime(strs, "%Y-%m-%d %H:%M:%S")
# 将时间元组转换为时间戳
t = time.mktime(t)
return int(t)
data_sj = time.strptime(str(times), "%Y年%m月%d日%H:%M")
return int(time.mktime(data_sj))
def get_md5(parmStr):
# 1、参数必须是utf8
# 2、python3所有字符都是unicode形式,已经不存在unicode关键字
# 3、python3 str 实质上就是unicode
if isinstance(parmStr, str):
# 如果是unicode先转utf-8
parmStr = parmStr.encode("utf-8")
m = hashlib.md5()
m.update(parmStr)
return m.hexdigest()
def save_mysql(timeStamp, biaoti, web_name, imgurl, videourl):
dic = {}
# print('&'*50)
zq_time = datetime.datetime.now().strftime("%Y-%m-%d %X")
# print(zq_time)
try:
dic['ir_title'] = str(biaoti)
# dic["ir_authors"] = 'null' # 用户名 / 作者
# dic["ir_serviceid"] = 'null' # 用户ID,
dic["ir_urltime"] = timeStamp # 发布时间 , 要转换成时间戳
# print(get_time(shijian))
dic["ir_urldate"] = get_time(zq_time) # 抓取时间, 要转换成时间戳
dic["ir_content"] = '' # 内容
# dic["ir_nresrved1"] = 'null' # 文章转发数,
# dic["ir_nresrved2"] = 'null' # 文章点赞数,
# dic["ir_nresrved3"] = 'null' # 文章评论数,
dic["ir_librariytype"] = 2 # 固定 2
dic["ir_score"] = 2 # 固定 2
dic["if_vcj"] = 2 # 1是采集下来的2是外部链接。如果没有视频就传0
dic["ir_mediasourceid"] = 7 # 1代表台湾网 2人民网 3新华网 4.央视网5.上海台6.东南台7.深圳台8.厦门台9.海峡台
dic["ir_mediatype"] = 0 # 固定 , 两岸为2
dic["ir_mediasource"] = web_name # 固定 新浪微博,
dic["ir_indexsource"] = "mapi1.kxm.xmtv.cn" # 固定 cna.com.tw,
dic["ir_url"] = videourl # 视频url,
# print(dic['ir_url'])
# dic["ir_keyword"] = 'null' # 抓的搜索关键字,如果抓的用户下的则为null,
dic["ir_area"] = 2 # 固定 2,
dic["ir_trade"] = -1 # 固定 - 1,
dic["ir_md5"] = hashlib.md5(
videourl.encode(encoding='UTF-8')).hexdigest() # get_md5(dic["ir_url"]).lower() # 对链接做md5,全小写,
print(dic["ir_md5"])
dic["ir_istrand"] = 0
dic[
"ir_isv"] = 1 # 已认证返回的是True 未认证返回False #是否加v 1不加v 2加v, ir_isv 普通用户1 微博个人认证2 微博机构认证3 微博达人4
dic["ir_imgbin"] = '' # 图片链接
dic["ir_imgurl"] = imgurl # 图片原始链接
dic["ir_videourl"] = videourl # 如有视频,存入上传后的视频链接
# print(dic["ir_imgbin"] )
### 数据库操作
# 获取数据库链接
# '''
connection = pymysql.connect(
host='140.210.4.73',
port=3306,
user='twipad_cj',
passwd='bj@#twipad_cj',
db='tw_ipaddb',
charset='utf8mb4'
)
try:
# 获取会话指针
with connection.cursor() as cursor:
# 创建sql语句
sheet_name = 'tw_webhistory_abroaddataall'
sql = """INSERT INTO {}(ir_isv,ir_urltime,ir_urldate,ir_md5,\
ir_content,ir_librariytype,ir_score,if_vcj,ir_mediasourceid,ir_mediatype,ir_mediasource,ir_indexsource,\
ir_url,ir_title,ir_area,ir_trade,ir_istrand,ir_imgbin,ir_imgurl,ir_videourl)\
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s , %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""".format(
sheet_name)
# 执行sql语句
# print(sql)
try:
cursor.execute(sql, (
dic["ir_isv"], dic["ir_urltime"], dic["ir_urldate"], dic['ir_md5'],
dic["ir_content"],
dic["ir_librariytype"], dic["ir_score"], dic["if_vcj"], dic["ir_mediasourceid"],
dic["ir_mediatype"], dic["ir_mediasource"],
dic["ir_indexsource"], dic["ir_url"], dic["ir_title"], dic["ir_area"], dic["ir_trade"],
dic["ir_istrand"], dic["ir_imgbin"], dic["ir_imgurl"], dic["ir_videourl"]))
ir_idd = int(connection.insert_id())
print('数据库自增id', ir_idd, '数据')
# 提交数据库
connection.commit()
print("tw_webhistory_abroaddataall表数据存储成功!", )
urrl = 'http://twipad.hnxinxiudata.top/api/data/web_data?ir_id={}'.format(ir_idd)
r = requests.get(urrl)
print(r)
except Exception as pymysqlErr:
print('=' * 50)
print(pymysqlErr)
print("tw_webhistory_abroaddataall表数据已存在,")
return 1
# time.sleep(2)
except Exception as e:
raise (e, 1)
finally:
connection.close()
except Exception as e:
print(e)
tc = 0
zong_sum = 0
web_name = '深圳台'
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4557.4 Safari/537.36'
}
url = f'https://api.scms.sztv.com.cn/api/com/article/getArticleList?tenantId=ysz&specialtype=1&banner=1&catalogId=7900&page=1'
print(url)
html = requests.get(url=url, headers=headers).json()
for item in html['returnData']['news']:
time.sleep(random.randint(1, 3))
# 记者
author = item['author']
# 标题
biaoti = item['title']
# 图片链接
imgurl = item['logo']
# 视频链接
id = item['id']
videourl = f'https://www.sztv.com.cn/ysz/dsdb/szws/zbgat/{id}.shtml'
shijian = item['publishDate']
timeArray = time.strptime(shijian, "%Y-%m-%d %H:%M:%S")
# 转换为时间戳:
timeStamp = int(time.mktime(timeArray))
print(shijian, timeStamp)
tc_sum = save_mysql(timeStamp, biaoti, web_name, imgurl, videourl)
time.sleep(2)
if tc_sum == 1:
tc += 1
print("数据重复进入,2次退出", tc_sum, tc)
if tc == 2:
# return "获取完毕!"
print("获取完毕!")
pass
zong_sum += 1
print('***************第', zong_sum, '条***************')
while 1:
if __name__ == '__main__':
xiamen()
haixia()
shenzhen()
time.sleep(24*3600)
本文作者:布都御魂
本文链接:https://www.cnblogs.com/wolvies/p/15584204.html
版权声明:本作品采用知识共享署名-非商业性使用-禁止演绎 2.5 中国大陆许可协议进行许可。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步