下载8000首儿歌的python代码
下载8000首儿歌的python的代码:
#-*- coding: UTF-8 -*- from pyquery import PyQuery as py from lxml import etree import urllib import re import os import sys import logging def format(filename): tuple=(' ','’','\'') for char in tuple: if (filename.find(char)!=-1): filename=filename.replace(char,"_") return filename def download_mp3(mp3_url, filename,dir): f = dir+"\\"+filename if os.path.exists(f): logger.debug(f+" is existed.") return try: open(f, 'wb').write(urllib.urlopen(mp3_url).read()) logger.debug( filename + ' is downloaded.') except: logger.debug( filename + ' is not downloaded.') def download_all_mp3(start,end,dir,logger): for x in range(start,end): try: url = "http://www.youban.com/mp3-d" + str(x) + ".html" logger.debug(str(x) + ": "+url) doc = py(url=url) e = doc('.mp3downloadbox') if e is None or e == '': logger.debug(url+" is not existed.") return e = unicode(e) #logger.debug( e) regex = re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S) m = regex.search(e) if m is not None: title = m.group(1).strip() title2 = str(x)+"_"+title + ".mp3" #title2 = re.sub(' ','_',title2) title2 = format(title2) link = m.group(2) #logger.debug( "title:" + title + " link:" + link) if link == '' or title == '': logger.debug(url + " is not useful") continue logger.debug(str(x)+": "+link) download_mp3(link,title2,dir) except: logger.debug(url+" met exception.") continue
if __name__ == "__main__": dir_root = "e:\\song" if sys.argv[3] != '': dir_root=sys.argv[3] start,end = 1,8000 if sys.argv[1] >= 0 and sys.argv[2]>=0: start,end = int(sys.argv[1]),int(sys.argv[2]) print ("Download from %s to %s.\n" % (start,end)) dir = dir_root + "\\"+str(start)+"-"+str(end) if not os.path.exists(dir): os.mkdir(dir) print "Download to " + dir + ".\n" logger = logging.getLogger("simple") logger.setLevel(logging.DEBUG) fh = logging.FileHandler(dir+"\\"+"download.log") ch = logging.StreamHandler() formatter = logging.Formatter("%(message)s") ch.setFormatter(formatter) fh.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) download_all_mp3(start,end,dir,logger)
有需要的可以参考继续修改。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理