python爬虫下载某网站156个网页小游戏素材
有哪些游戏自己看吧↓
一波网页小游戏(摸鱼专用)
https://www.52pojie.cn/thread-1269936-1-1.html
懒得看代码的可以直接下载成品,分成了两个包
https://wwi.lanzoui.com/iwGxvgqiwzc
密码:d89r
https://wwi.lanzoui.com/i7WQvgqisqj
密码:dg3j
以下为python代码
import requestsfrom bs4 import BeautifulSoup def get_Url(url): str_list = [] content = requests.get(url).content soup = BeautifulSoup(content, 'lxml') find = soup.find('span', attrs={'class': 'current'}) sum = int(find.text.split('/')[1]) for i in range(sum): if i == 0: str_list.append('https://www.mycodes.net/166/') continue str_list.append('https://www.mycodes.net/166/' + str(i + 1) + '.htm') return str_list def get_document(url): soup = BeautifulSoup(requests.get(url).content, 'lxml') find_all = soup.find_all('a', attrs={'style': 'color:#006BCD;font-size:14px;'}) a = '' for value in find_all: if a.__eq__(str(value['href'])): continue a = value['href'] document = BeautifulSoup(requests.get(value['href']).content, 'lxml') text = document.find('td', attrs={'class': 'a0'}).text print(text+":") td_s = document.find_all('td', attrs={'class': 'b1'}) for td in td_s: find = td.find('a') if find is not None: print(find['href']) if __name__ == '__main__': url_list = get_Url('https://www.mycodes.net/166/') for url in url_list: get_document(url)