OpenWrt – Blame information for rev 1
?pathlinks?
Rev | Author | Line No. | Line |
---|---|---|---|
1 | office | 1 | #!/usr/bin/env python |
2 | # |
||
3 | # Copyright (c) 2018 Yousong Zhou <yszhou4tech@gmail.com> |
||
4 | # |
||
5 | # This is free software, licensed under the GNU General Public License v2. |
||
6 | # See /LICENSE for more information. |
||
7 | |||
8 | import argparse |
||
9 | import calendar |
||
10 | import datetime |
||
11 | import errno |
||
12 | import fcntl |
||
13 | import hashlib |
||
14 | import json |
||
15 | import os |
||
16 | import os.path |
||
17 | import re |
||
18 | import shutil |
||
19 | import ssl |
||
20 | import subprocess |
||
21 | import sys |
||
22 | import time |
||
23 | import urllib2 |
||
24 | |||
25 | TMPDIR = os.environ.get('TMP_DIR') or '/tmp' |
||
26 | TMPDIR_DL = os.path.join(TMPDIR, 'dl') |
||
27 | |||
28 | |||
29 | class PathException(Exception): pass |
||
30 | class DownloadGitHubError(Exception): pass |
||
31 | |||
32 | |||
33 | class Path(object): |
||
34 | """Context class for preparing and cleaning up directories. |
||
35 | |||
36 | If ```preclean` is ``False``, ``path`` will NOT be removed on context enter |
||
37 | |||
38 | If ``path`` ``isdir``, then it will be created on context enter. |
||
39 | |||
40 | If ``keep`` is True, then ``path`` will NOT be removed on context exit |
||
41 | """ |
||
42 | |||
43 | def __init__(self, path, isdir=True, preclean=False, keep=False): |
||
44 | self.path = path |
||
45 | self.isdir = isdir |
||
46 | self.preclean = preclean |
||
47 | self.keep = keep |
||
48 | |||
49 | def __enter__(self): |
||
50 | if self.preclean: |
||
51 | self.rm_all(self.path) |
||
52 | if self.isdir: |
||
53 | self.mkdir_all(self.path) |
||
54 | return self |
||
55 | |||
56 | def __exit__(self, exc_type, exc_value, traceback): |
||
57 | if not self.keep: |
||
58 | self.rm_all(self.path) |
||
59 | |||
60 | @staticmethod |
||
61 | def mkdir_all(path): |
||
62 | """Same as mkdir -p.""" |
||
63 | names = os.path.split(path) |
||
64 | p = '' |
||
65 | for name in names: |
||
66 | p = os.path.join(p, name) |
||
67 | Path._mkdir(p) |
||
68 | |||
69 | @staticmethod |
||
70 | def _rmdir_dir(dir_): |
||
71 | names = Path._listdir(dir_) |
||
72 | for name in names: |
||
73 | p = os.path.join(dir_, name) |
||
74 | Path.rm_all(p) |
||
75 | Path._rmdir(dir_) |
||
76 | |||
77 | @staticmethod |
||
78 | def _mkdir(path): |
||
79 | Path._os_func(os.mkdir, path, errno.EEXIST) |
||
80 | |||
81 | @staticmethod |
||
82 | def _rmdir(path): |
||
83 | Path._os_func(os.rmdir, path, errno.ENOENT) |
||
84 | |||
85 | @staticmethod |
||
86 | def _remove(path): |
||
87 | Path._os_func(os.remove, path, errno.ENOENT) |
||
88 | |||
89 | @staticmethod |
||
90 | def _listdir(path): |
||
91 | return Path._os_func(os.listdir, path, errno.ENOENT, default=[]) |
||
92 | |||
93 | @staticmethod |
||
94 | def _os_func(func, path, errno, default=None): |
||
95 | """Call func(path) in an idempotent way. |
||
96 | |||
97 | On exception ``ex``, if the type is OSError and ``ex.errno == errno``, |
||
98 | return ``default``, otherwise, re-raise |
||
99 | """ |
||
100 | try: |
||
101 | return func(path) |
||
102 | except OSError as e: |
||
103 | if e.errno == errno: |
||
104 | return default |
||
105 | else: |
||
106 | raise |
||
107 | |||
108 | @staticmethod |
||
109 | def rm_all(path): |
||
110 | """Same as rm -r.""" |
||
111 | if os.path.islink(path): |
||
112 | Path._remove(path) |
||
113 | elif os.path.isdir(path): |
||
114 | Path._rmdir_dir(path) |
||
115 | else: |
||
116 | Path._remove(path) |
||
117 | |||
118 | @staticmethod |
||
119 | def untar(path, into=None): |
||
120 | """Extract tarball at ``path`` into subdir ``into``. |
||
121 | |||
122 | return subdir name if and only if there exists one, otherwise raise PathException |
||
123 | """ |
||
124 | args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions') |
||
125 | subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22)) |
||
126 | dirs = os.listdir(into) |
||
127 | if len(dirs) == 1: |
||
128 | return dirs[0] |
||
129 | else: |
||
130 | raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs)) |
||
131 | |||
132 | @staticmethod |
||
133 | def tar(path, subdir, into=None, ts=None): |
||
134 | """Pack ``path`` into tarball ``into``.""" |
||
135 | # --sort=name requires a recent build of GNU tar |
||
136 | args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name'] |
||
137 | args += ['-C', path, '-cf', into, subdir] |
||
138 | envs = os.environ.copy() |
||
139 | if ts is not None: |
||
140 | args.append('--mtime=@%d' % ts) |
||
141 | if into.endswith('.xz'): |
||
142 | envs['XZ_OPT'] = '-7e' |
||
143 | args.append('-J') |
||
144 | elif into.endswith('.bz2'): |
||
145 | args.append('-j') |
||
146 | elif into.endswith('.gz'): |
||
147 | args.append('-z') |
||
148 | envs['GZIP'] = '-n' |
||
149 | else: |
||
150 | raise PathException('unknown compression type %s' % into) |
||
151 | subprocess.check_call(args, env=envs) |
||
152 | |||
153 | |||
154 | class GitHubCommitTsCache(object): |
||
155 | __cachef = 'github.commit.ts.cache' |
||
156 | __cachen = 2048 |
||
157 | |||
158 | def __init__(self): |
||
159 | Path.mkdir_all(TMPDIR_DL) |
||
160 | self.cachef = os.path.join(TMPDIR_DL, self.__cachef) |
||
161 | self.cache = {} |
||
162 | |||
163 | def get(self, k): |
||
164 | """Get timestamp with key ``k``.""" |
||
165 | fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT) |
||
166 | with os.fdopen(fileno) as fin: |
||
167 | try: |
||
168 | fcntl.lockf(fileno, fcntl.LOCK_SH) |
||
169 | self._cache_init(fin) |
||
170 | if k in self.cache: |
||
171 | ts = self.cache[k][0] |
||
172 | return ts |
||
173 | finally: |
||
174 | fcntl.lockf(fileno, fcntl.LOCK_UN) |
||
175 | return None |
||
176 | |||
177 | def set(self, k, v): |
||
178 | """Update timestamp with ``k``.""" |
||
179 | fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT) |
||
180 | with os.fdopen(fileno, 'wb+') as f: |
||
181 | try: |
||
182 | fcntl.lockf(fileno, fcntl.LOCK_EX) |
||
183 | self._cache_init(f) |
||
184 | self.cache[k] = (v, int(time.time())) |
||
185 | self._cache_flush(f) |
||
186 | finally: |
||
187 | fcntl.lockf(fileno, fcntl.LOCK_UN) |
||
188 | |||
189 | def _cache_init(self, fin): |
||
190 | for line in fin: |
||
191 | k, ts, updated = line.split() |
||
192 | ts = int(ts) |
||
193 | updated = int(updated) |
||
194 | self.cache[k] = (ts, updated) |
||
195 | |||
196 | def _cache_flush(self, fout): |
||
197 | cache = sorted(self.cache.iteritems(), cmp=lambda a, b: b[1][1] - a[1][1]) |
||
198 | cache = cache[:self.__cachen] |
||
199 | self.cache = {} |
||
200 | os.ftruncate(fout.fileno(), 0) |
||
201 | fout.seek(0, os.SEEK_SET) |
||
202 | for k, ent in cache: |
||
203 | ts = ent[0] |
||
204 | updated = ent[1] |
||
205 | line = '{0} {1} {2}\n'.format(k, ts, updated) |
||
206 | fout.write(line) |
||
207 | |||
208 | |||
209 | class DownloadGitHubTarball(object): |
||
210 | """Download and repack archive tarabll from GitHub. |
||
211 | |||
212 | Compared with the method of packing after cloning the whole repo, this |
||
213 | method is more friendly to users with fragile internet connection. |
||
214 | |||
215 | However, there are limitations with this method |
||
216 | |||
217 | - GitHub imposes a 60 reqs/hour limit for unauthenticated API access. |
||
218 | This affects fetching commit date for reproducible tarballs. Download |
||
219 | through the archive link is not affected. |
||
220 | |||
221 | - GitHub archives do not contain source codes for submodules. |
||
222 | |||
223 | - GitHub archives seem to respect .gitattributes and ignore pathes with |
||
224 | export-ignore attributes. |
||
225 | |||
226 | For the first two issues, the method will fail loudly to allow fallback to |
||
227 | clone-then-pack method. |
||
228 | |||
229 | As for the 3rd issue, to make sure that this method only produces identical |
||
230 | tarballs as the fallback method, we require the expected hash value to be |
||
231 | supplied. That means the first tarball will need to be prepared by the |
||
232 | clone-then-pack method |
||
233 | """ |
||
234 | |||
235 | __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)') |
||
236 | |||
237 | def __init__(self, args): |
||
238 | self.dl_dir = args.dl_dir |
||
239 | self.version = args.version |
||
240 | self.subdir = args.subdir |
||
241 | self.source = args.source |
||
242 | self.url = args.url |
||
243 | self._init_owner_repo() |
||
244 | self.xhash = args.hash |
||
245 | self._init_hasher() |
||
246 | self.commit_ts = None # lazy load commit timestamp |
||
247 | self.commit_ts_cache = GitHubCommitTsCache() |
||
248 | self.name = 'github-tarball' |
||
249 | |||
250 | def download(self): |
||
251 | """Download and repack GitHub archive tarball.""" |
||
252 | self._init_commit_ts() |
||
253 | with Path(TMPDIR_DL, keep=True) as dir_dl: |
||
254 | # fetch tarball from GitHub |
||
255 | tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl') |
||
256 | with Path(tarball_path, isdir=False): |
||
257 | self._fetch(tarball_path) |
||
258 | # unpack |
||
259 | d = os.path.join(dir_dl.path, self.subdir + '.untar') |
||
260 | with Path(d, preclean=True) as dir_untar: |
||
261 | tarball_prefix = Path.untar(tarball_path, into=dir_untar.path) |
||
262 | dir0 = os.path.join(dir_untar.path, tarball_prefix) |
||
263 | dir1 = os.path.join(dir_untar.path, self.subdir) |
||
264 | # submodules check |
||
265 | if self._has_submodule(dir0): |
||
266 | raise self._error('Fetching submodules is not yet supported') |
||
267 | # rename subdir |
||
268 | os.rename(dir0, dir1) |
||
269 | # repack |
||
270 | into=os.path.join(TMPDIR_DL, self.source) |
||
271 | Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts) |
||
272 | try: |
||
273 | self._hash_check(into) |
||
274 | except Exception: |
||
275 | Path.rm_all(into) |
||
276 | raise |
||
277 | # move to target location |
||
278 | file1 = os.path.join(self.dl_dir, self.source) |
||
279 | if into != file1: |
||
280 | shutil.move(into, file1) |
||
281 | |||
282 | def _has_submodule(self, dir_): |
||
283 | m = os.path.join(dir_, '.gitmodules') |
||
284 | try: |
||
285 | st = os.stat(m) |
||
286 | return st.st_size > 0 |
||
287 | except OSError as e: |
||
288 | return e.errno != errno.ENOENT |
||
289 | |||
290 | def _init_owner_repo(self): |
||
291 | m = self.__repo_url_regex.search(self.url) |
||
292 | if m is None: |
||
293 | raise self._error('Invalid github url: {}'.format(self.url)) |
||
294 | owner = m.group('owner') |
||
295 | repo = m.group('repo') |
||
296 | if repo.endswith('.git'): |
||
297 | repo = repo[:-4] |
||
298 | self.owner = owner |
||
299 | self.repo = repo |
||
300 | |||
301 | def _init_hasher(self): |
||
302 | xhash = self.xhash |
||
303 | if len(xhash) == 64: |
||
304 | self.hasher = hashlib.sha256() |
||
305 | elif len(xhash) == 32: |
||
306 | self.hasher = hashlib.md5() |
||
307 | else: |
||
308 | raise self._error('Requires sha256sum for verification') |
||
309 | self.xhash = xhash |
||
310 | |||
311 | def _hash_check(self, f): |
||
312 | with open(f, 'rb') as fin: |
||
313 | while True: |
||
314 | d = fin.read(4096) |
||
315 | if not d: |
||
316 | break |
||
317 | self.hasher.update(d) |
||
318 | xhash = self.hasher.hexdigest() |
||
319 | if xhash != self.xhash: |
||
320 | raise self._error('Wrong hash (probably caused by .gitattributes), expecting {}, got {}'.format(self.xhash, xhash)) |
||
321 | |||
322 | def _init_commit_ts(self): |
||
323 | if self.commit_ts is not None: |
||
324 | return |
||
325 | # GitHub provides 2 APIs[1,2] for fetching commit data. API[1] is more |
||
326 | # terse while API[2] provides more verbose info such as commit diff |
||
327 | # etc. That's the main reason why API[1] is preferred: the response |
||
328 | # size is predictable. |
||
329 | # |
||
330 | # However, API[1] only accepts complete commit sha1sum as the parameter |
||
331 | # while API[2] is more liberal accepting also partial commit id and |
||
332 | # tags, etc. |
||
333 | # |
||
334 | # [1] Get a single commit, Repositories, https://developer.github.com/v3/repos/commits/#get-a-single-commit |
||
335 | # [2] Git Commits, Git Data, https://developer.github.com/v3/git/commits/#get-a-commit |
||
336 | apis = [ |
||
337 | { |
||
338 | 'url': self._make_repo_url_path('git', 'commits', self.version), |
||
339 | 'attr_path': ('committer', 'date'), |
||
340 | }, { |
||
341 | 'url': self._make_repo_url_path('commits', self.version), |
||
342 | 'attr_path': ('commit', 'committer', 'date'), |
||
343 | }, |
||
344 | ] |
||
345 | version_is_sha1sum = len(self.version) == 40 |
||
346 | if not version_is_sha1sum: |
||
347 | apis.insert(0, apis.pop()) |
||
348 | for api in apis: |
||
349 | url = api['url'] |
||
350 | attr_path = api['attr_path'] |
||
351 | try: |
||
352 | ct = self.commit_ts_cache.get(url) |
||
353 | if ct is not None: |
||
354 | self.commit_ts = ct |
||
355 | return |
||
356 | ct = self._init_commit_ts_remote_get(url, attr_path) |
||
357 | self.commit_ts = ct |
||
358 | self.commit_ts_cache.set(url, ct) |
||
359 | return |
||
360 | except Exception: |
||
361 | pass |
||
362 | raise self._error('Cannot fetch commit ts: {}'.format(url)) |
||
363 | |||
364 | def _init_commit_ts_remote_get(self, url, attrpath): |
||
365 | resp = self._make_request(url) |
||
366 | data = resp.read() |
||
367 | date = json.loads(data) |
||
368 | for attr in attrpath: |
||
369 | date = date[attr] |
||
370 | date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ') |
||
371 | date = date.timetuple() |
||
372 | ct = calendar.timegm(date) |
||
373 | return ct |
||
374 | |||
375 | def _fetch(self, path): |
||
376 | """Fetch tarball of the specified version ref.""" |
||
377 | ref = self.version |
||
378 | url = self._make_repo_url_path('tarball', ref) |
||
379 | resp = self._make_request(url) |
||
380 | with open(path, 'wb') as fout: |
||
381 | while True: |
||
382 | d = resp.read(4096) |
||
383 | if not d: |
||
384 | break |
||
385 | fout.write(d) |
||
386 | |||
387 | def _make_repo_url_path(self, *args): |
||
388 | url = '/repos/{0}/{1}'.format(self.owner, self.repo) |
||
389 | if args: |
||
390 | url += '/' + '/'.join(args) |
||
391 | return url |
||
392 | |||
393 | def _make_request(self, path): |
||
394 | """Request GitHub API endpoint on ``path``.""" |
||
395 | url = 'https://api.github.com' + path |
||
396 | headers = { |
||
397 | 'Accept': 'application/vnd.github.v3+json', |
||
398 | 'User-Agent': 'OpenWrt', |
||
399 | } |
||
400 | req = urllib2.Request(url, headers=headers) |
||
401 | sslcontext = ssl._create_unverified_context() |
||
402 | fileobj = urllib2.urlopen(req, context=sslcontext) |
||
403 | return fileobj |
||
404 | |||
405 | def _error(self, msg): |
||
406 | return DownloadGitHubError('{}: {}'.format(self.source, msg)) |
||
407 | |||
408 | |||
409 | def main(): |
||
410 | parser = argparse.ArgumentParser() |
||
411 | parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir') |
||
412 | parser.add_argument('--url', help='Download URL') |
||
413 | parser.add_argument('--subdir', help='Source code subdir name') |
||
414 | parser.add_argument('--version', help='Source code version') |
||
415 | parser.add_argument('--source', help='Source tarball filename') |
||
416 | parser.add_argument('--hash', help='Source tarball\'s expected sha256sum') |
||
417 | args = parser.parse_args() |
||
418 | try: |
||
419 | method = DownloadGitHubTarball(args) |
||
420 | method.download() |
||
421 | except Exception as ex: |
||
422 | sys.stderr.write('{}: Download from {} failed\n'.format(args.source, args.url)) |
||
423 | sys.stderr.write('{}\n'.format(ex)) |
||
424 | sys.exit(1) |
||
425 | |||
426 | if __name__ == '__main__': |
||
427 | main() |