定时任务中多进程线程 对数据库操作 同一数据操作 的 冲突避免
import os, sys
import time
import logging
import requests
import threading
import random
from random import choice
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
"""
全局约定,便于后期做日志分析
os._exit(INT)
4001 4002 4003 4004
"""
start_time = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
os_sep = os.sep
this_file_abspath, this_file_name = os.path.dirname(os.path.abspath(__file__)), os.path.abspath(__file__).split(os_sep)[
-1]
base_dir = os.path.dirname(os_sep.join(os.path.abspath(__file__).split(os_sep)[0:-2]))
log_abspath = '%s%s%s' % (base_dir, os_sep, 'log')
"""
日志的记录不能依赖于日志类
"""
now_, e = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), '启动脚本'
logf, s = '%s%s%s%s' % (log_abspath, os_sep, this_file_name, now_), '%s%s%s%s' % (__file__, now_, os.getcwd(), e)
logf_selenium = '%s%s' % (logf, 'seleniumlog')
with open(logf, 'a') as fo:
fo.write(s)
print(s)
try:
sys.path.append(base_dir)
from core.utils import MysqlHelper
except Exception as e:
s = '%s%s%s' % (
'from core.utils import MysqlHelper EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())),
e)
with open(logf, 'a') as fo:
fo.write(s)
print(s)
os._exit(4001)
try:
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s[thread:%(thread)d][process:%(process)d]',
datefmt='%a, %d %b %Y %H:%M:%S',
filename=logf,
filemode='a')
except Exception as e:
s = '%s%s%s' % ('logging.basicConfig EXCEPTION ', time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time())), e)
with open(logf, 'a') as fo:
fo.write(s)
print(s)
os._exit(4002)
try:
fua, lua = '%s%s%s' % (this_file_abspath, os_sep,
'ua_list.txt'), []
with open(fua, 'r') as fo:
for i in fo:
lua.append(i.replace('\n', ''))
except Exception as e:
s = '%s%s' % ('打开文件 EXCEPTION ua文件路径: ', fua)
logging.error(s)
print(s)
os._exit(4003)
"""
对异常无限重启
"""
try:
"""
该脚本任务:仅涉及test_error_temp,仅更新url状态计数:行删除、行不删(打不开、无我司广告,切换或不更新)、正常
"""
mysql_obj = MysqlHelper()
desc = 'DESC' if random.randint(1, 2) == 2 else 'ASC'
q = 'SELECT DISTINCT url FROM test_error_temp WHERE no_ad_times+no_open_times+ok_times<script_need_run_times ORDER BY id %s ;' % (
desc)
s = '%s%s' % (' DB SQL ', q)
logging.info(s)
tuple_l = mysql_obj.select(q)
del mysql_obj
if len(tuple_l) == 0:
s = '无待检测url,程序退出'
print(s)
logging.info(s)
except Exception as e:
s = '%s%s%s' % ('初始数据,查询数据库异常,无限次重启该脚本', e, time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())))
print(s)
logging.warning(s)
cmd = 'python %s' % (__file__)
os.system(cmd)
os._exit(1024)
ctrl_start, max_script_time = time.time(), 3600 * 6
mycode_l, repeat_times, repeat_sleep_times = ['g3user', '51g3.com.cn'], 2, 2
c_done, c_all, tl, tstep = 0, len(tuple_l), [], 1000
drop_ = 0
s = '%s%s%s%s%s%s%s' % (
' drop_,ctrl_start, max_script_time,mycode_l, repeat_times, repeat_sleep_times ', drop_, ctrl_start,
max_script_time,
mycode_l, repeat_times, repeat_sleep_times)
print(s)
logging.info(s)
def main():
def ctrl_runtime(exit_type=''):
if time.time() - ctrl_start >= max_script_time:
s = '%s%s%s%s%s%s%s%s%s' % (
'程序开始执行时间', ctrl_start, '执行时间阈值', max_script_time, '终止执行', ' exit_type =', exit_type, ' threadID ',
threading.get_ident())
logging.info(s)
if exit_type == '':
exit(s)
elif exit_type == 'sys':
sys.exit(s)
elif exit_type == 'os':
# an integer is required
# Required argument 'status' (pos 1) not found
os._exit(4004)
def chk_exception_url(url, sleep_seconds=0, http_tag='http://'):
time.sleep(sleep_seconds)
ret = {}
# 程序异常,则目前认为,待检查url情况为期待值,
# ret['ok']初始值为-1,即该次检查不写数据库
ret['ok'] = -1
s = '%s%s%s%s%s%s%s%s%s%s' % (
time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())), ' os.getppid() ', os.getppid(),
' os.getpid() ', os.getpid(), ' threading.get_ident ', threading.get_ident(), ' start_time ', start_time,
url)
try:
if url.find('http') == -1:
url = '%s%s' % (http_tag, url)
r = requests.get(url)
ret['status_code'], txt_pos = int(r.status_code), -1
s = '%s,%s,%s,%s,%s' % (s, ret['status_code'], url, r, r.reason)
except Exception as e:
s = '%s %s %s' % (s, ' SPIDER ', e)
logging.error(s)
print(e, url)
ret['status_code'], ret['info'] = -1, s
return ret
if ret['status_code'] == 200:
for ii in mycode_l:
if r.text.find(ii) > -1:
s = '%s%s' % (s, ' OK ')
logging.info(s)
ret['info'] = s
return ret
try:
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = choice(lua)
dcap['browserName'], dcap['platform'] = 'Mozilla', 'Win'
driver = webdriver.PhantomJS(desired_capabilities=dcap,
executable_path='/usr/local/phantomjs/bin/phantomjs',
service_log_path=logf_selenium)
driver.get(url)
page_source = driver.page_source
driver.quit()
del driver
for ii in mycode_l:
if page_source.find(ii) > -1:
s = '%s%s' % (s, ' OK ')
logging.info(s)
ret['info'] = s
return ret
s = '%s%s' % (s, '返回200,但是在html中未检测到我公司代码。')
ret['ok'], ret['info'] = 0, s
return ret
except Exception as e:
s = '%s%s%s%s' % (s, ' SPIDER ', e, ' 返回200,但是在检查是否我司代码环节,程序执行异常')
logging.error(s)
ret['info'] = s
return ret
elif ret['status_code'] >= 400:
logging.warning(s)
ret['ok'], ret['info'] = 0, s
return ret
def tf(ts):
global c_done
te = ts + tstep
te = min(te, c_all)
for i in tuple_l[ts:te]:
ctrl_runtime(exit_type='os')
url = i[0]
s = '%s%s%s%s%s%s%s%s%s%s' % (
time.strftime('%Y%m%d %H:%M:%S', time.localtime(time.time())), ' os.getppid() ', os.getppid(),
' os.getpid() ', os.getpid(), ' threading.get_ident ', threading.get_ident(), ' start_time ',
start_time,
url)
for t in range(0, repeat_times, 1):
ret = chk_exception_url(url, repeat_sleep_times)
if ret['ok'] == 1:
c_done += 1
s = '%s/%s%s%s' % (c_done, c_all, 'chk-ret', s)
print(s)
logging.info(s)
break
q, ctime = '', int(time.time())
if ret['ok'] == 1:
q = 'UPDATE test_error_temp SET ok_times=ok_times+1,remarks=CONCAT("%s",remarks),update_time="%s" WHERE url="%s" AND no_ad_times+no_open_times+ok_times<script_need_run_times ' % (
ret['info'], ctime, url)
elif ret['ok'] == 0:
if ret['status_code'] == 200:
q = 'UPDATE test_error_temp SET no_ad_times=no_ad_times+1,remarks=CONCAT("%s",remarks),update_time="%s" WHERE url="%s" AND no_ad_times+no_open_times+ok_times<script_need_run_times ' % (
ret['info'], ctime, url)
else:
q = 'UPDATE test_error_temp SET no_open_times=no_open_times+1,remarks=CONCAT("%s",remarks),update_time="%s" WHERE url="%s" AND no_ad_times+no_open_times+ok_times<script_need_run_times ' % (
ret['info'], ctime, url)
if q is not '':
try:
mysql_obj = MysqlHelper()
mysql_obj.execute(q)
mysql_obj.commit()
del mysql_obj
s = '%s%s%s' % (s, ' DB SQL ok ', q)
logging.info(s)
print(s)
except Exception as e:
s = '%s%s%s%s' % (s, ' DB Exception- ', q, e)
logging.error(s)
print(s)
class MyThread(threading.Thread):
def __init__(self, func, args, name):
threading.Thread.__init__(self)
self.func, self.args, self.name = func, args, name
def run(self):
self.func(self.args)
for i in range(0, c_all, tstep):
thread_instance = MyThread(tf, (i), tf.__name__)
tl.append(thread_instance)
for t in tl:
t.setDaemon = False
t.start()
for t in tl:
t.join()
if __name__ == '__main__':
main()
虽然在数据源头有 no_ad_times+no_open_times+ok_times<script_need_run_times,读环节有限制条件
但是在更新环节,写环节,同样需要把该条件加上
no_ad_times+no_open_times+ok_times<script_need_run_times
-- 历史有效检测次数统计
SELECT COUNT(1) ,no_open_times+no_ad_times+ok_times as a FROM test_error_temp GROUP BY a ORDER BY a DESC;
COUNT(1) a
1 75
15 74
53 73
114 72
51 71
46 70
61 69
81 68
86 67
73 66
80 65
121 64
118 63
125 62
136 61
137 60
154 59
197 58
200 57
186 56
214 55
避免 75/74/73异常数据