A Completed Python Scripter and File Handle with Matplotlib

import os
import requests
import requests
from bs4 import BeautifulSoup
from typing import Dict

total_div: Dict[str, BeautifulSoup] = dict()

def the_big_div(text: str):
    soup = BeautifulSoup(text, 'lxml')
    count = soup.find_all('div', class_='code-content content')
    for item in count:
            aa_ = item.find('a',class_='sortid')
            if aa_:
                total_div[aa_.text] = item
    

def download_file(url, local_file_path):
    # Ensure the directory exists
    with open(local_file_path, 'wb') as f:
        f.write(requests.get(url).content)

# Example usage:
# download_file('https://example.com/file.torrent', '/torrent/file.torrent')

def parser_the_sub_html(url: str) -> str:
    soup = BeautifulSoup(requests.get(url).text, 'lxml')
    return soup.find('a',class_='xfcomment').get('href')
    
def clean_the_file_name(name: str) -> str:
    name = name.replace(' ', '_').replace(':', '').replace('?', '').replace('*', '').replace('\n','').replace('/','_')
    if len(name) > 100:
        return name[:100]
    else:
        return name


if __name__ == '__main__':
    url = 'https://share.xfsub.org'
    for i in range(101, 202):
        sub_urls = f'https://share.xfsub.org/sort-1-{i}.html'
        the_big_div(requests.get(sub_urls).text)
        print(total_div.keys())
        
        # the address of the subitem page
        all_subitem = total_div['动画'].find_all('a', class_='name-text')
        # iterate over the subitems
        for sub_item in all_subitem:
                download_file(
                    parser_the_sub_html(
                        url + sub_item.get('href')
                    ),
                    'torrent/'+ clean_the_file_name(f'{sub_item.text}.torrent')
                )
                print(sub_item.text.strip(),'\t'*5,'________________ -> done')
import os
import re
from collections import Counter
import matplotlib.pyplot as plt
import pandas as pd

# read the list of the file name in the directory
def get_file_list(path: str) -> list:
    file_list = os.listdir(path)
    return file_list

if __name__ == '__main__':
    total_list = []
    
    plt.rcParams['font.sans-serif'] = ['SimHei']
    for item in get_file_list(r'C:\Users\123\Desktop\Project_Try\demo\src\test\java\scripts\torrent'):
        group_ = re.match(r'[\[\【](.*?)[\]\】]', item)
        if group_:
            total_list.append(group_.group(1))

    
    count = Counter(total_list)
    pd1 = pd.DataFrame(count.most_common(), columns=['资源', '数量'])
    
    plt.bar(pd1['资源'].to_list(), pd1['数量'].to_list())
    plt.title('资源统计')
    
    plt.show()
posted @ 2024-06-23 23:27  Y&Qter  阅读(11)  评论(0编辑  收藏  举报