随笔 - 57  文章 - 3 评论 - 0 阅读 - 2317
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

# -*- coding: utf-8 -*-
"""
Created on 2024-05-31 10:21:56
---------
@summary:
---------
@author: me
"""
import json
from feapder.db.mysqldb import MysqlDB
import feapder

"""
# MYSQL
MYSQL_IP = "127.0.0.1"
MYSQL_PORT = 3306
MYSQL_DB = "spider"
MYSQL_USER_NAME = "root"
MYSQL_USER_PASS = "123456"
"""

"""
在自己的数据库要设置对应得表,建好表标后才可以数据导入数据库
"""

class FirstSpider(feapder.AirSpider):
db = MysqlDB()

def start_requests(self):

yield feapder.Request("https://18je.life/")

# def download_midware(self, request):
# # 这里使用代理使用即可117.184.37.22
# request.proxies = {"http": "http://113.121.22.221:9999"}
# # request.proxies = {"http": "http://182.34.103.249:9999","https": "https://182.34.103.249:9999"}
# return request
def parse(self, request, response):
divs = response.xpath('//div[@class="tab-head"]/a[position()>1]/@href').extract()
for num,url in enumerate(divs):
print(url)
yield feapder.Request(url = url,callback=self.parse_one,num=num)


def parse_one(self, request, response):
print(request.num)

next_page = response.xpath('//ul[@class="pagelist"]/li[last()]/a/@href').extract_first()
li_list = response.xpath('//ul[@class="list"]/li')
for i in li_list:
item= {}
item["num"] =request.num
item["title"] = i.xpath('.//div[@class="title"]/text()').extract_first()
item["cover"] = i.xpath('.//div[@class="vodlist_img"]/img/@data-original').extract_first()
video_url = i.xpath('./a/@href').extract_first()
yield feapder.Request(url=video_url, callback=self._parse_next, item=item)

#下面的代码是通过判断,深入一页一页的爬取数据
# if next_page == response.url:
# return 0
# yield feapder.Request(url=next_page, callback=self.parse_one)

def _parse_next(self, request, response):
# request.proxies = {"http": "http://117.69.236.252:8089"}
m3u8_ = response.text
m3u8_ = response.re('"url":"(.*?)",', m3u8_)[1]
m3u8_ = m3u8_.replace('\\', '')
item = request.item
# print("1:",m3u8_)
headers = {
"referer": "https://18je.life/",
"origin": "https://18je.life",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
}
# print(item)
yield feapder.Request(url=m3u8_, headers=headers,callback=self._parse_last,item=item)

def _parse_last(self, request, response):
#item必须每一个parse函数都要写
item = request.item
url_ = "https://vodvip888.com"
m3u8_ = response.text
m3u8_ = response.re('#EXT-X-STREAM-INF.*?\\n(.*?).m3u8', m3u8_)[0] + ".m3u8"
m3u8_ = url_ + m3u8_.replace('\\', '')
item['m3u8'] = m3u8_
print(item)
#在自己的数据库要设置对应得表,建好表标后才可以数据导入数据库 db_3为对应得表名称
self.db.add_smart("db_3",item)

#写入json文件
# item_ = json.dumps(item, ensure_ascii=False)
# with open('1.json','a',encoding='utf-8') as f:
# f.write(item_)
# f.write(',')


def start_callback(self):
pass
# with open('1.json','w') as f:
# f.write('[')
def end_callback(self):
pass
# with open('1.json', 'a') as f:
# f.write(']')




if __name__ == "__main__":
FirstSpider(thread_count=16).start()
posted on   我爱你的  阅读(147)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 周边上新:园子的第一款马克杯温暖上架
· 分享 3 个 .NET 开源的文件压缩处理库,助力快速实现文件压缩解压功能!
· Ollama——大语言模型本地部署的极速利器
· DeepSeek如何颠覆传统软件测试?测试工程师会被淘汰吗?
· 使用C#创建一个MCP客户端
点击右上角即可分享
微信分享提示