使用python拉取ubuntu软件源
目前为止最中意的ubuntu版本就是12.04LTS,但是怕这个版本的源在未来几年中被官方下架,所以就编写了一个脚本,将指定ubuntu版本的所有源同步到本地。
1 #!/usr/bin/python 2 # -*- coding: UTF-8 -*- 3 4 ''' 5 @author gaocan 809900210@qq.com 6 ''' 7 8 import urllib 9 import gzip 10 import re 11 import os 12 import sys 13 14 # 15 script_dir = os.path.dirname(os.path.realpath(__file__)) 16 17 # 18 def SystemCall(cmd): 19 if 0 != os.system(cmd): 20 exit(1) 21 22 # 23 def Extract(src_file, dst_file): 24 # 25 f = gzip.GzipFile(src_file) 26 # 27 open(dst_file, "wb+").write(f.read()) 28 f.close() 29 30 #create folder 31 def CreateFolder(dst_dir): 32 if not os.path.exists(dst_dir): 33 os.makedirs(dst_dir) 34 35 # 36 def ParsePackageNames(f): 37 # 38 names = [] 39 40 # 41 line = f.readline() 42 while line: 43 if re.match(r'Files:\s+.*', line, re.M|re.I): 44 pos = f.tell() 45 line = f.readline() 46 while line: 47 if re.match(r'\s+.*\s+\d*\s+.*', line, re.M|re.I): 48 names.append(line.split()[2]) 49 else: 50 f.seek(pos) 51 break 52 pos = f.tell() 53 line = f.readline() 54 return names 55 56 line = f.readline() 57 58 return names 59 60 # 61 def ParsePackages(filename): 62 # 63 packages = [] 64 65 # 66 dst_file = filename.replace(".gz", "") 67 Extract(filename, dst_file) 68 69 # 70 with open(dst_file, 'r') as f: 71 line = f.readline() 72 while line: 73 if re.match(r'Filename:\s+.*', line, re.M|re.I): 74 package_path = line[len('Filename:'):].strip() 75 packages.append(package_path) 76 line = f.readline() 77 78 # 79 if os.path.exists(dst_file): 80 os.remove(dst_file) 81 return packages 82 83 # 84 def ParseSources(filename): 85 # 86 packages = [] 87 88 # 89 dst_file = filename.replace(".gz", "") 90 Extract(filename, dst_file) 91 92 # 93 with open(dst_file, 'r') as f: 94 line = f.readline() 95 while line: 96 if re.match(r'Directory:\s+.*', line, re.M|re.I): 97 package_dir = line[len('Directory:'):].strip() 98 package_names = ParsePackageNames(f) 99 for package_name in package_names: 100 package_path = package_dir + '/' + package_name 101 packages.append(package_path) 102 line = f.readline() 103 104 # 105 if os.path.exists(dst_file): 106 os.remove(dst_file) 107 return packages 108 109 # 110 def ParseDists(https_url, version_code): 111 l = [] 112 pattern = r'<a.*>.*%s.*</a>' % (version_code) 113 it = re.finditer(pattern, urllib.urlopen(https_url + '/dists').read().decode('utf-8')) 114 for match in it: 115 s = re.compile(r'href=".*%s.*"' % (version_code)).findall(match.group()) 116 if len(s) > 0: 117 s = re.sub(r'href=|"|/', "", s[0]) 118 l.append(s) 119 120 return l 121 122 # 123 def ParseIndices(https_url, version_code): 124 l = [] 125 pattern = r'<a.*>.*%s.*</a>' % (version_code) 126 it = re.finditer(pattern, urllib.urlopen(https_url + '/indices').read().decode('utf-8')) 127 for match in it: 128 s = re.compile(r'href=".*%s.*"' % (version_code)).findall(match.group()) 129 if len(s) > 0: 130 s = re.sub(r'href=|"|/', "", s[0]) 131 l.append(s) 132 133 return l 134 135 136 #get dists 137 def GetDists(version_codes, https_url): 138 l = [] 139 for version_code in version_codes: 140 l = l + ParseDists(https_url, version_code) 141 return l 142 143 #get indices 144 def GetIndices(version_codes, https_url): 145 l = [] 146 for version_code in version_codes: 147 l = l + ParseIndices(https_url, version_code) 148 return l 149 150 #rsync project 151 def RsyncProject(rsync_cmd, rsync_url, dst_dir): 152 SystemCall(rsync_cmd + ' ' + rsync_url + '/project ' + dst_dir) 153 154 #rsync indices 155 def RsyncIndices(indices, rsync_cmd, rsync_url, dst_dir): 156 for indice in indices: 157 # 158 SystemCall(rsync_cmd + ' ' + rsync_url + '/indices/' + indice + ' ' + dst_dir) 159 160 #rsync dists 161 def RsyncDists(dists, rsync_cmd, rsync_url, dst_dir): 162 for dist in dists: 163 # 164 SystemCall(rsync_cmd + ' ' + rsync_url + '/dists/' + dist + ' ' + dst_dir) 165 166 #make packages list 167 def MakePackagesList(packages_list, dst_dir, dists): 168 with open(packages_list, 'w') as packages_list_file: 169 #parse packages and write packages list 170 for dist in dists: 171 # 172 for root, dirs, files in os.walk(dst_dir + '/dists/' + dist): 173 for f in files: 174 f = os.path.join(root, f) 175 filename = os.path.basename(f) 176 packages = [] 177 if filename == 'Sources.gz': 178 packages = ParseSources(f) 179 elif filename == 'Packages.gz': 180 packages = ParsePackages(f) 181 182 for package in packages: 183 packages_list_file.write(package) 184 packages_list_file.write('\n') 185 186 #rsync dst from packages.list 187 def RsyncPackages(packages_list, rsync_cmd, rsync_url, dst_dir): 188 SystemCall(rsync_cmd + ' --files-from=' + packages_list + ' ' + rsync_url + ' ' + dst_dir) 189 190 def main(): 191 if len(sys.argv) < 4: 192 return 1 193 194 # 195 web_url = sys.argv[1] 196 dst_dir = sys.argv[2] + '/ubuntu' 197 198 # 199 version_codes = [] 200 i = 3 201 while i < len(sys.argv): 202 version_codes.append(sys.argv[i]) 203 i = i + 1 204 205 # 206 packages_list = script_dir + '/Packages.list' 207 208 # 209 rsync_cmd = 'rsync -R -r -z -a -v' 210 211 # 212 rsync_url = 'rsync://rsync.' + web_url 213 https_url = 'https://' + web_url 214 215 # 216 dists = [] 217 indices = ['md5sums.gz'] 218 219 # 220 dists = dists + GetDists(version_codes, https_url) 221 indices = indices + GetIndices(version_codes, https_url) 222 223 CreateFolder(dst_dir) 224 225 RsyncProject(rsync_cmd, rsync_url, dst_dir) 226 RsyncIndices(indices, rsync_cmd, rsync_url, dst_dir) 227 RsyncDists(dists, rsync_cmd, rsync_url, dst_dir) 228 MakePackagesList(packages_list, dst_dir, dists) 229 RsyncPackages(packages_list, rsync_cmd, rsync_url, dst_dir) 230 231 print('rsync finished!') 232 return 0 233 234 if __name__ == "__main__": 235 main()
比如同步中科大的ubuntu precise所有软件源,只需要在终端中键入
python pull.ubuntu.packages.py mirrors.ustc.edu.cn/ubuntu /home/username precise