使用python拉取ubuntu软件源

目前为止最中意的ubuntu版本就是12.04LTS,但是怕这个版本的源在未来几年中被官方下架,所以就编写了一个脚本,将指定ubuntu版本的所有源同步到本地。

  1 #!/usr/bin/python
  2 # -*- coding: UTF-8 -*-
  3 
  4 '''
  5 @author gaocan 809900210@qq.com
  6 '''
  7 
  8 import urllib
  9 import gzip
 10 import re
 11 import os
 12 import sys
 13 
 14 #
 15 script_dir = os.path.dirname(os.path.realpath(__file__))
 16 
 17 #
 18 def SystemCall(cmd):
 19     if 0 != os.system(cmd):
 20         exit(1)
 21 
 22 #
 23 def Extract(src_file, dst_file):
 24     #
 25     f = gzip.GzipFile(src_file)
 26     #
 27     open(dst_file, "wb+").write(f.read())
 28     f.close()
 29 
 30 #create folder
 31 def CreateFolder(dst_dir):
 32     if not os.path.exists(dst_dir):
 33         os.makedirs(dst_dir)
 34 
 35 #
 36 def ParsePackageNames(f):
 37     #
 38     names = []
 39 
 40     #
 41     line = f.readline()
 42     while line:
 43         if re.match(r'Files:\s+.*', line, re.M|re.I):
 44             pos = f.tell()
 45             line = f.readline()
 46             while line:
 47                 if re.match(r'\s+.*\s+\d*\s+.*', line, re.M|re.I):
 48                     names.append(line.split()[2])
 49                 else:
 50                     f.seek(pos)
 51                     break
 52                 pos = f.tell()
 53                 line = f.readline()
 54             return names
 55             
 56         line = f.readline()
 57 
 58     return names
 59 
 60 #
 61 def ParsePackages(filename):
 62     #
 63     packages = []
 64     
 65     #
 66     dst_file = filename.replace(".gz", "")
 67     Extract(filename, dst_file)
 68     
 69     #
 70     with open(dst_file, 'r') as f:
 71         line = f.readline()
 72         while line:
 73             if re.match(r'Filename:\s+.*', line, re.M|re.I):
 74                 package_path = line[len('Filename:'):].strip()
 75                 packages.append(package_path)
 76             line = f.readline()
 77     
 78     #
 79     if os.path.exists(dst_file):
 80         os.remove(dst_file)
 81     return packages
 82 
 83 #
 84 def ParseSources(filename):
 85     #
 86     packages = []
 87     
 88     #
 89     dst_file = filename.replace(".gz", "")
 90     Extract(filename, dst_file)
 91     
 92     #
 93     with open(dst_file, 'r') as f:
 94         line = f.readline()
 95         while line:
 96             if re.match(r'Directory:\s+.*', line, re.M|re.I):
 97                 package_dir = line[len('Directory:'):].strip()
 98                 package_names = ParsePackageNames(f)
 99                 for package_name in package_names:
100                     package_path = package_dir + '/' + package_name
101                     packages.append(package_path)
102             line = f.readline()
103     
104     #
105     if os.path.exists(dst_file):
106         os.remove(dst_file)
107     return packages
108 
109 #
110 def ParseDists(https_url, version_code):
111     l = []
112     pattern = r'<a.*>.*%s.*</a>' % (version_code)
113     it = re.finditer(pattern, urllib.urlopen(https_url + '/dists').read().decode('utf-8')) 
114     for match in it:
115         s = re.compile(r'href=".*%s.*"' % (version_code)).findall(match.group())
116         if len(s) > 0:
117             s = re.sub(r'href=|"|/', "", s[0])
118             l.append(s)
119     
120     return l
121 
122 #
123 def ParseIndices(https_url, version_code):
124     l = []
125     pattern = r'<a.*>.*%s.*</a>' % (version_code)
126     it = re.finditer(pattern, urllib.urlopen(https_url + '/indices').read().decode('utf-8')) 
127     for match in it:
128         s = re.compile(r'href=".*%s.*"' % (version_code)).findall(match.group())
129         if len(s) > 0:
130             s = re.sub(r'href=|"|/', "", s[0])
131             l.append(s)
132         
133     return l
134 
135 
136 #get dists
137 def GetDists(version_codes, https_url):
138     l = []
139     for version_code in version_codes:
140         l = l + ParseDists(https_url, version_code)
141     return l
142 
143 #get indices
144 def GetIndices(version_codes, https_url):
145     l = []
146     for version_code in version_codes:
147         l = l + ParseIndices(https_url, version_code)
148     return l
149 
150 #rsync project
151 def RsyncProject(rsync_cmd, rsync_url, dst_dir):
152     SystemCall(rsync_cmd + ' ' + rsync_url + '/project ' + dst_dir)
153 
154 #rsync indices
155 def RsyncIndices(indices, rsync_cmd, rsync_url, dst_dir):
156     for indice in indices:
157         #
158         SystemCall(rsync_cmd + ' ' + rsync_url + '/indices/' + indice + ' ' + dst_dir)
159 
160 #rsync dists
161 def RsyncDists(dists, rsync_cmd, rsync_url, dst_dir):
162     for dist in dists:
163         #
164         SystemCall(rsync_cmd + ' ' + rsync_url + '/dists/' + dist + ' ' + dst_dir)
165 
166 #make packages list
167 def MakePackagesList(packages_list, dst_dir, dists):
168     with open(packages_list, 'w') as packages_list_file:    
169         #parse packages and write packages list
170         for dist in dists:
171             #
172             for root, dirs, files in os.walk(dst_dir + '/dists/' + dist):
173                 for f in files:
174                     f = os.path.join(root, f)
175                     filename = os.path.basename(f)
176                     packages = []
177                     if filename == 'Sources.gz':
178                         packages = ParseSources(f)
179                     elif filename == 'Packages.gz':
180                         packages = ParsePackages(f)
181                     
182                     for package in packages:
183                         packages_list_file.write(package)
184                         packages_list_file.write('\n')
185 
186 #rsync dst from packages.list
187 def RsyncPackages(packages_list, rsync_cmd, rsync_url, dst_dir):
188     SystemCall(rsync_cmd + ' --files-from=' + packages_list + ' ' + rsync_url + ' ' + dst_dir)
189 
190 def main():
191     if len(sys.argv) < 4:
192         return 1
193     
194     #
195     web_url = sys.argv[1]
196     dst_dir = sys.argv[2] + '/ubuntu'
197 
198     #
199     version_codes = []
200     i = 3
201     while i < len(sys.argv):
202         version_codes.append(sys.argv[i])
203         i = i + 1
204     
205     #
206     packages_list = script_dir + '/Packages.list'
207     
208     #
209     rsync_cmd = 'rsync -R -r -z -a -v'
210     
211     #
212     rsync_url = 'rsync://rsync.' + web_url
213     https_url = 'https://' + web_url
214 
215     #
216     dists = []
217     indices = ['md5sums.gz']
218 
219     #
220     dists = dists + GetDists(version_codes, https_url)
221     indices = indices + GetIndices(version_codes, https_url)
222 
223     CreateFolder(dst_dir)
224 
225     RsyncProject(rsync_cmd, rsync_url, dst_dir)
226     RsyncIndices(indices, rsync_cmd, rsync_url, dst_dir)
227     RsyncDists(dists, rsync_cmd, rsync_url, dst_dir)
228     MakePackagesList(packages_list, dst_dir, dists)
229     RsyncPackages(packages_list, rsync_cmd, rsync_url, dst_dir)
230     
231     print('rsync finished!')
232     return 0
233 
234 if __name__ == "__main__":
235     main()

 

比如同步中科大的ubuntu precise所有软件源,只需要在终端中键入

python pull.ubuntu.packages.py mirrors.ustc.edu.cn/ubuntu /home/username precise

 

posted @ 2021-05-22 15:11  幻梦雷音  阅读(112)  评论(0编辑  收藏  举报