github 上下载单个目录 python 实现
网上说 Git1.7.0 以后加入了 Sparse Checkout 模式,即可以指定下载哪些文件,但试了下 git remote add -f origin <url> 还是会下载很多缓存东西放在 .git 目录中,根本不是我想要的,于是手写一个吧。
先安装扩展
python -m pip install --upgrade pip
pip install requests bs4
1 #!/usr/bin/env python3 2 # -*- coding: utf8 -*- 3 4 import requests 5 import sys 6 import os 7 from bs4 import BeautifulSoup 8 9 url = sys.argv[1] 10 path = sys.argv[2] 11 12 if url.isspace(): 13 print('url 地址为空!') 14 exit() 15 16 if not path.isspace(): 17 if not os.path.exists(path): 18 print('新建目录:' + path) 19 os.mkdir(path) 20 else: 21 path = '.' 22 23 html = requests.request('get', url) 24 html.encoding = 'utf8' 25 html = html.text 26 27 soup = BeautifulSoup(html, 'html.parser') 28 files = soup.select('div[aria-labelledby="files"] div[role="rowheader"] a') 29 files.pop(0) 30 31 #生成 url 地址,类似于: https://raw.githubusercontent.com/docker-library/php/master/8.0/buster/fpm 32 url = url.replace("github.com", "raw.githubusercontent.com") 33 url = url.replace("/tree/", "/") 34 35 for name in files: 36 name = name.string 37 38 print("文件" + name + " :" + url + "/" + name) 39 content = requests.request('get', url + "/" + name) 40 content.encoding = 'utf8' 41 content = content.text 42 43 with open(path + '/' + name, 'w', encoding='utf-8') as fp: 44 fp.write(content)