python爬取全站壁纸代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | #测试网址:https://www.ivsky.com/bizhi/ #需要安装的库:requests,bs4 #本人是个强迫症患者,为了美观添加数个print(),其并没有实际意义,若是不爽删去即可。 import requests,re,os from bs4 import BeautifulSoup from time import sleep from random import uniform #网址解析 def url_open(url): headers = {} headers[ "User-Agent" ] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" headers[ "Referer" ] = "https://www.ivsky.com/bizhi/" html = requests.get(url,headers = headers).text return html #获取全部主题图片链接 def get_url_all(): print ( "正在收集整理壁纸主题网址,请稍候....." ) print () theme_url_list = [] theme_title_list = [] data = [] page_totle = 100 #壁纸主题共有100页 #逐页收集主题URL for page in range ( 1 ,page_totle + 1 ): url = "https://www.ivsky.com/bizhi/index_{}.html" . format (page) html = url_open(url) soup = BeautifulSoup(html, "html.parser" ) url_all = soup.find_all( "div" , class_ = "il_img" ) for each in url_all: theme_title = each.a[ "title" ] theme_title_list.append(theme_title) theme_url = "https://www.ivsky.com" + each.a[ "href" ] theme_url_list.append(theme_url) #将数据打包 以便能够将两个数据一起返回 data.append(theme_url_list) data.append(theme_title_list) break #减少调试运行时间使用 若要获取全部主题链接则删除此处即可 theme_totle = len (data[ 0 ]) #计算主题数目 print ( "壁纸网址收集结束,共收集%d个主题,准备进行图片下载....." % theme_totle) sleep( 1 ) #走个形式而已 return data def save_img(img_url_list,theme_name,work_path): #更改图片保存路径(分主题保存) save_path = work_path + r "\%s" % theme_name if os.path.exists(save_path) = = True : os.chdir(save_path) else : os.mkdir(save_path) os.chdir(save_path) num = 0 #当前任务图片下载计数 for img_url in img_url_list: num + = 1 print ( "正在下载主题“%s”第%d张图片" % (theme_name, num)) headers = {} headers[ "User-Agent" ] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" content = requests.get(img_url, headers = headers).content with open ( "%d.jpg" % num, "wb" ) as f: f.write(content) sleep_time = uniform( 0.18 , 0.37 ) #随机休眠 减少服务器压力 (真有诚意调大点即可) sleep(sleep_time) def get_img(data): img_root_url = "https://img.ivsky.com/img/bizhi/pre/" num_1 = - 1 # 标题索引 后面用于索引标题 work_path = os.getcwd() num_2 = 0 #统计图片总张数 for theme_url in data[ 0 ]: #print(theme_url) num_1 + = 1 theme_name_temp = data[ 1 ][num_1] #获取对应的主题名称 img_url_list = [] #用于存储单个主题的图片下载链接 #去掉(x张)字眼 (强迫症患者) p_theme_name = r '(.+)[(]\d+?张[)]' theme_name = re.findall(p_theme_name,theme_name_temp)[ 0 ] print () print ( "正在下载主题:%s" % theme_name) print () #每个页面16张图片 若主题图片数目大于16张图片则存在多个页面..... p_img_num = r '.+[(](\d+?)张[)]' img_num = int (re.findall(p_img_num,theme_name_temp)[ 0 ]) if img_num / 16 > img_num / / 16 : page_totle = img_num / / 16 + 1 else : page_totle = img_num / 16 #获取全部图片链接 if page_totle = = 1 : html = url_open(theme_url) soup = BeautifulSoup(html, "html.parser" ) soup_img_url = soup.find_all( "div" , class_ = "il_img" ) for each in soup_img_url: temp = each.img[ "src" ].split( "/t/" )[ 1 ] img_url = img_root_url + temp img_url_list.append(img_url) num_2 + = 1 else : for page in range ( 1 ,page_totle + 1 ): url = theme_url + "index_{}.html" . format (page) html = url_open(url) soup = BeautifulSoup(html, "html.parser" ) soup_img_url = soup.find_all( "div" , class_ = "il_img" ) for each in soup_img_url: temp = each.img[ "src" ].split( "/t/" )[ 1 ] img_url = img_root_url + temp img_url_list.append(img_url) num_2 + = 1 save_img(img_url_list, theme_name,work_path) #图片下载保存 print () print ( "任务完成,共计下载图片%d张" % num_2) def main(): path = r 'C:\Users\Administrator\Desktop\test' if os.getcwd() ! = path: if os.path.exists(path) = = False : os.mkdir(path) os.chdir(path) else : os.chdir(path) data = get_url_all() get_img(data) if __name__ = = "__main__" : main() |
千行代码,Bug何处藏。 纵使上线又怎样,朝令改,夕断肠。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具
2018-10-15 dns-prefetch应用好,网上速度能提高一半!