yhdm动漫爬虫项目

复制代码
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 28 17:21:10 2020

@author: Mto
"""
"""
网址:http://www.yhdm.tv/
目的
获取视频文件
8月28日,代码功能基本实现
"""
import requests
import re
from bs4 import BeautifulSoup

def getHTML(url):
    """访问网站获取页面,返回页面"""
    header = {
        'Host':'www.yhdm.tv',
        'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
    }
    r = requests.get(url,headers=header)
    r.encoding = 'utf-8'
    return r


def GetMp4HTML(url):
    """访问网站获取页面,返回页面"""
    header = {
        'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0'
    }
    r = requests.get(url,headers=header)
    r.encoding = 'utf-8'
    print(r.status_code)
    return r

      
def processHTML(r):
    """使用bs4进行处理"""
    soup = BeautifulSoup(r.text,'html.parser')
    return soup


def GetLinksAndTitle(url1,title,links):
    r = getHTML(url1)
    soup = processHTML(r)
    print('要下载的动漫为:'+soup.h1.string)
    a = soup.findAll(style="display:block")
    href = a[0].find_all('a')
    for i in range(0,len(href)):
        title.append(href[i].text)
        links.append('http://www.yhdm.tv' + href[i]['href'])
    
        
    
def getmp4(link):
    """提取页面中的视频文件链接"""
    r = getHTML(link)
    soup = processHTML(r)
    s = soup.select('div#playbox')
    geturl = re.compile('^https.*\.mp4')
    mo = geturl.search(s[0].attrs['data-vid'])
    return(str(mo.group()))
    #download(str(mo.group()), title)


def download(mp4link,title):
    """下载视频"""
    #r = GetMp4HTML(mp4link)
    print(mp4link)
    print(title+'模拟访问成功,不下了,放过那个可怜的服务器吧')
    # try:
    #     with open(title+'.mp4','wb') as f:
    #         f.write(r.content)
    #         print(title+'下载成功')
    # except:
    #     print(title+'下载失败')
    
            
def main():
    title = []
    links=[]
    url = 'http://www.yhdm.tv/show/4790.html'
    GetLinksAndTitle(url, title, links)
    for i in range(0,3):
        mp4link = getmp4(links[i])
        download(mp4link, title[i])
main()
复制代码

 

posted @   JKding233  阅读(9517)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)
点击右上角即可分享
微信分享提示