https://www.jb51.net/article/119178.htm
# -*- coding: UTF-8 -*-
'''
https://github.com/halibobo/runnerbar-image
https://www.jb51.net/article/69153.html
'''
import re
import urllib.request
# ------ 获取网页源代码的方法 ---
def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
return html
# ------ getHtml()内输入任意帖子的URL ------
html = getHtml("https://tieba.baidu.com/p/5352556650")
# ------ 修改html对象内的字符编码为UTF-8 ------
html = html.decode('UTF-8')
# ------ 获取帖子内所有图片地址的方法 ------
def getImg(html):
# ------ 利用正则表达式匹配网页内容找到图片地址 ------
reg = r'src="([.*\S]*\.jpg)"'
imgre = re.compile(reg);
imglist = re.findall(imgre, html)
return imglist
imgList = getImg(html)
##download_url
##url --json提取url
imgList_test={"https://gjnkw6amjgits2kc304.exp.bcedocument.com/target/doc-hivu9jd6p2i4siw/doc-hivu9jd6p2i4siw/0.jpg?x-bce-range=0-191200",
"https://gjnkw6amjgits2kc304.exp.bcedocument.com/target/doc-hivu9jd6p2i4siw/doc-hivu9jd6p2i4siw/0.jpg?x-bce-range=191200-411580",
"https://gjnkw6amjgits2kc304.exp.bcedocument.com/target/doc-hivu9jd6p2i4siw/doc-hivu9jd6p2i4siw/0.jpg?x-bce-range=411580-564343",
"https://gjnkw6amjgits2kc304.exp.bcedocument.com/target/doc-hivu9jd6p2i4siw/doc-hivu9jd6p2i4siw/0.jpg?x-bce-range=564343-660625",
"https://gjnkw6amjgits2kc304.exp.bcedocument.com/target/doc-hivu9jd6p2i4siw/doc-hivu9jd6p2i4siw/0.jpg?x-bce-range=660625-700778"}
imgName = 0
##urllist 不会按顺序下载
for imgPath in imgList_test:
# ------ 这里最好使用异常处理及多线程编程方式 ------
try:
f = open('D:\\temp\\'+"测试技术简介第一章_"+ str(imgName)+".jpg", 'wb')
f.write((urllib.request.urlopen(imgPath)).read())
print(imgPath)
f.close()
except Exception as e:
print(imgPath+" error")
imgName += 1
print("All Done!")
#######################
# -*- coding: UTF-8 -*-
'''
https://github.com/halibobo/runnerbar-image
https://www.jb51.net/article/69153.html
'''
import bs4
import urllib
from urllib import request
from bs4 import BeautifulSoup as bs
from urllib.error import URLError, HTTPError
import xlwt
from xlwt import Workbook
import json
import requests
import time
import datetime
import os
def getRaceInfo(id,page):
url='http://m.yundong.runnerbar.com/yd_mobile/share/album.json'
para = {'activity_id':id,'page':page,'pageSize':100}
header = {}
r = requests.post(url,data=para,headers= header)
json_r = r.json()
parsed_json = json_r['album']['searchResultList']
activity = {}
items = []
count = json_r['album']['activity_photo_count']
for item in parsed_json:
items.append(item)
activity['items'] = items
activity['count'] = count
return activity
def startRace(id):
row_index = 1001
workbook = xlwt.Workbook(encoding = 'utf-8')
try:
activity = getRaceInfo(id,1)
tempdata = activity['items']
count = int(activity['count'])
if count > 0:
print(id,count)
for i in range(1,int(count/100+2)):
try:
data = getRaceInfo(id,i)['items']
for item in data:
print(item['url_hq'])
save_img(item['url_hq'],row_index,'book'+id)
row_index = row_index +1
print(row_index)
except HTTPError as e:
print('Error code: ', e.code)
except URLError as e:
print('Reason: ', e.reason)
except Exception as e:
print('错误 :',e)
except Exception as e:
print('错误 :',e)
def save_img(img_url,file_name,file_path='img'):
#保存图片到磁盘文件夹 file_path中,默认为当前脚本运行目录下的 book\img文件夹
try:
if not os.path.exists(file_path):
print('文件夹',file_path,'不存在,重新建立')
#os.mkdir(file_path)
os.makedirs(file_path)
#获得图片后缀
file_suffix = os.path.splitext(img_url)[1]
#拼接图片名(包含路径)
filename = '{}{}{}{}'.format(file_path,os.sep,file_name,file_suffix)
#下载图片,并保存到文件夹中
urllib.request.urlretrieve(img_url,filename=filename)
except IOError as e:
print('文件操作失败',e)
except HTTPError as e:
print('Error code: ', e.code)
except Exception as e:
print('错误 :',e)
# 0-100 10712 7376 10765
for i in range(7835,7836):
startRace(str(i))
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!