import requests
from lxml import etree
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/11.1.111.111 Safari'}
def file_save(video_info):
video_info = "{}\n".format(video_info)
with open('video_info.txt', mode='a', encoding='utf-8') as f:
f.write(video_info)
def get_link(video_url):
url = '{}{}'.format("https://www.ygdy8.net", video_url)
response = requests.get(url, headers=HEADERS)
response.encoding = 'gb2312'
html = etree.HTML(response.text)
video_name = html.xpath('//*[@id="header"]/div/div[3]/div[3]/div[1]/div[2]/div[1]/h1/font/text()')[0]
video_link = html.xpath('//a[@target="_blank"]/@href')[0]
video_info = "{}\n{}".format(video_name, video_link)
file_save(video_info)
print(video_info)
def get_page():
while True:
video_index = []
# 选择分区
type_lidex = [["欧美电影", "oumei", "7"],
["国内电影", "china", "4"]]
for idx, item in enumerate(type_lidex, 1):
message = "{}.{}".format(idx, item[0])
print(message)
choice = input("请输入要浏览的分区(按q退出): ")
if not choice:
print("输入错误!")
continue
if choice.lower() == 'q':
print("bye bey~")
break
page_type = type_lidex[int(choice) - 1][1]
page_code = type_lidex[int(choice) - 1][2]
# 选择页数
while True:
page = input("请输入要浏览的页数(按q返回电影类型): ")
if not page:
print("输入错误!")
continue
if page.lower() == 'q':
break
url = 'https://www.ygdy8.net/html/gndy/{}/list_{}_{}.html'.format(page_type, page_code, page)
response = requests.get(url, headers=HEADERS)
response.encoding = 'gb2312'
html = etree.HTML(response.text)
video_name = html.xpath('//td/b/a[2]/text()')
video_url = html.xpath('//td/b/a[2]/@href')
for i in range(len(video_name)):
video_index.append([video_name[i], video_url[i]])
for idx, item in enumerate(video_index, 1):
message = "{}.{}".format(idx, item[0])
print(message)
# 选择需要下载的电影电影
while True:
choice = input("请选择需要下载的电影(按Q/q返回上级页面): ")
if not choice:
print("输入错误!")
continue
if choice.lower() == 'q':
break
video_url = video_index[int(choice) - 1][1]
get_link(video_url)
def run():
get_page()
if __name__ == '__main__':
run()