# -*- coding: utf-8 -*-
"""
Created on 2024-05-31 10:21:56
---------
@summary:
---------
@author: me
"""
import json
from feapder.db.mysqldb import MysqlDB
import feapder
"""
# MYSQL
MYSQL_IP = "127.0.0.1"
MYSQL_PORT = 3306
MYSQL_DB = "spider"
MYSQL_USER_NAME = "root"
MYSQL_USER_PASS = "123456"
"""
"""
在自己的数据库要设置对应得表,建好表标后才可以数据导入数据库
"""
class FirstSpider(feapder.AirSpider):
db = MysqlDB()
def start_requests(self):
yield feapder.Request("https://18je.life/")
# def download_midware(self, request):
# # 这里使用代理使用即可117.184.37.22
# request.proxies = {"http": "http://113.121.22.221:9999"}
# # request.proxies = {"http": "http://182.34.103.249:9999","https": "https://182.34.103.249:9999"}
# return request
def parse(self, request, response):
divs = response.xpath('//div[@class="tab-head"]/a[position()>1]/@href').extract()
for num,url in enumerate(divs):
print(url)
yield feapder.Request(url = url,callback=self.parse_one,num=num)
def parse_one(self, request, response):
print(request.num)
next_page = response.xpath('//ul[@class="pagelist"]/li[last()]/a/@href').extract_first()
li_list = response.xpath('//ul[@class="list"]/li')
for i in li_list:
item= {}
item["num"] =request.num
item["title"] = i.xpath('.//div[@class="title"]/text()').extract_first()
item["cover"] = i.xpath('.//div[@class="vodlist_img"]/img/@data-original').extract_first()
video_url = i.xpath('./a/@href').extract_first()
yield feapder.Request(url=video_url, callback=self._parse_next, item=item)
#下面的代码是通过判断,深入一页一页的爬取数据
# if next_page == response.url:
# return 0
# yield feapder.Request(url=next_page, callback=self.parse_one)
def _parse_next(self, request, response):
# request.proxies = {"http": "http://117.69.236.252:8089"}
m3u8_ = response.text
m3u8_ = response.re('"url":"(.*?)",', m3u8_)[1]
m3u8_ = m3u8_.replace('\\', '')
item = request.item
# print("1:",m3u8_)
headers = {
"referer": "https://18je.life/",
"origin": "https://18je.life",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
}
# print(item)
yield feapder.Request(url=m3u8_, headers=headers,callback=self._parse_last,item=item)
def _parse_last(self, request, response):
#item必须每一个parse函数都要写
item = request.item
url_ = "https://vodvip888.com"
m3u8_ = response.text
m3u8_ = response.re('#EXT-X-STREAM-INF.*?\\n(.*?).m3u8', m3u8_)[0] + ".m3u8"
m3u8_ = url_ + m3u8_.replace('\\', '')
item['m3u8'] = m3u8_
print(item)
#在自己的数据库要设置对应得表,建好表标后才可以数据导入数据库 db_3为对应得表名称
self.db.add_smart("db_3",item)
#写入json文件
# item_ = json.dumps(item, ensure_ascii=False)
# with open('1.json','a',encoding='utf-8') as f:
# f.write(item_)
# f.write(',')
def start_callback(self):
pass
# with open('1.json','w') as f:
# f.write('[')
def end_callback(self):
pass
# with open('1.json', 'a') as f:
# f.write(']')
if __name__ == "__main__":
FirstSpider(thread_count=16).start()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 周边上新:园子的第一款马克杯温暖上架
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· DeepSeek如何颠覆传统软件测试?测试工程师会被淘汰吗?
· 使用C#创建一个MCP客户端