这个作为调配的
taskMaster.py
import queue
import random
from multiprocessing.managers import BaseManager
from todos.test import get_href
import requests
from bs4 import BeautifulSoup
import time
task_queue = queue.Queue()
result_queue = queue.Queue()
class QueueManager(BaseManager):
pass
class SaceMnage():
def __init__(self):
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0',
'Connection': 'keep-alive', 'Host': 'www.27270.com', 'If-Modified-Since': 'Sat, 22 Dec 2018 19',
'If-None-Match': 'W/"5c1e8fff-b918"', 'Referer': 'https', 'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
QueueManager.register('get_task_queue', callable=self.get_task_queue)
QueueManager.register('get_result_queue', callable=self.get_result_queue)
self.manager = QueueManager(address=('127.0.0.1', 5000), authkey='abc'.encode('UTF-8'))
self.manager.start()
self.task = self.manager.get_task_queue()
self.result = self.manager.get_result_queue()
self.num = 1
self.url = "https://www.27270.com/word/dongwushijie/"
def get_task_queue(self):
return task_queue
def get_result_queue(self):
return result_queue
def main(self):
self.distributed_task(self.url)
self.close()
def distributed_task(self, url):
self.num += 1
res = requests.get(url, headers=self.headers)
res.encoding = 'gb18030'
soup = BeautifulSoup(res.text, 'lxml')
resultList = soup.select("div.MeinvTuPianBox ul li")
print(len(resultList))
for result in resultList:
a_list = result.select("a")
a = a_list[0]
print('Put task %s ...' % a["href"])
self.task.put(a["href"])
for li in soup.select(".NewPages ul li a"):
if li.text == "下一页":
nexturl = self.url + li.get("href")
print(nexturl)
time.sleep(10)
while True:
if self.task.qsize() <= 60:
self.distributed_task(nexturl)
try:
nexturl = url[:-1] + str(int(url[-1]) + 1)
print(nexturl)
time.sleep(10)
while True:
if self.task.qsize() <= 60:
self.distributed_task(nexturl)
except:
self.wait()
def wait(self):
import time
time.sleep(6000*3)
self.manager.shutdown()
def close(self):
self.manager.shutdown()
def ss(self):
zurl = "https://www.27270.com/ent/meinvtupian/list_11_212.html"
url = 'https://www.27270.com/ent/meinvtupian/list_11_'
self.task.put(zurl)
time.sleep(3600*3)
self.close()
if __name__ == '__main__':
sace = SaceMnage()
sace.main()
说好的动物呢!!!!!

地址:https://github.com/libaibuaidufu/queue_pacong
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 解答了困扰我五年的技术问题
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· 用 C# 插值字符串处理器写一个 sscanf
· Java 中堆内存和栈内存上的数据分布和特点
· 开发中对象命名的一点思考
· 为什么说在企业级应用开发中,后端往往是效率杀手?
· DeepSeek 解答了困扰我五年的技术问题。时代确实变了!
· 本地部署DeepSeek后,没有好看的交互界面怎么行!
· 趁着过年的时候手搓了一个低代码框架
· 推荐一个DeepSeek 大模型的免费 API 项目!兼容OpenAI接口!