Bug 1348229 - Add support to unzip directly from a remote URL. r?mshal
--- a/python/mozbuild/mozbuild/action/unzip.py
+++ b/python/mozbuild/mozbuild/action/unzip.py
@@ -4,34 +4,110 @@
# This script creates a zip file, but will also strip any binaries
# it finds before adding them to the zip.
from __future__ import absolute_import
import argparse
import mozpack.path as mozpath
+import requests
import sys
from mozbuild.util import ensureParentDir
from mozpack.mozjar import JarReader
from mozpack.files import DeflatedFile
+class RangeHelperTrait(object):
+ def _normalize_slice(self, slice):
+ assert slice.step is None
+ start = slice.start or 0
+ if start < 0:
+ start += len(self)
+ end = slice.stop
+ if end is None:
+ return start, None
+ if end < 0:
+ end += len(self) + 1
+ return start, end
+
+
+# The JarReader class can take any kind of sliceable object as raw data.
+# The HTTPRangeData class is such an object, that does HTTP Range
+# requests when slices are requested.
+# Because the JarReader relies on memoryview-like behavior and uses
+# open-ended slices, we don't actually emit HTTP requests until we
+# have been asked for an exact length. However, because the JarReader
+# does a lot of small reads (2 or 4 bytes), we also always read at
+# least 1024 bytes at once.
+# While we don't cache all the ranges of data we've requested so far,
+# we do cache the last one. In practice, this is enough to avoid doing
+# multiple requests for the same data.
+class HTTPRangeData(RangeHelperTrait):
+ class HTTPOpenEndedRange(RangeHelperTrait):
+ def __init__(self, range, start):
+ self._range = range
+ self._start = start
+
+ def __getitem__(self, key):
+ assert isinstance(key, slice)
+ start, end = self._normalize_slice(key)
+ if end is None:
+ return HTTPRangeData.HTTPOpenEndedRange(
+ self._range, self._start + start)
+
+ return self._range[self._start + start: self._start + end]
+
+ def __len__(self):
+ return len(self._range) - self._start
+
+ def __init__(self, url):
+ self._session = requests.Session()
+ r = self._session.head(url, allow_redirects=True)
+ self._len = long(r.headers['Content-Length'])
+ self._url = r.url
+ self._last = None
+
+ def __getitem__(self, key):
+ assert isinstance(key, slice)
+ start, end = self._normalize_slice(key)
+ if end is None:
+ return HTTPRangeData.HTTPOpenEndedRange(self, start)
+
+ if self._last:
+ last_start, last_content = self._last
+ last_end = last_start + len(last_content)
+ if start >= last_start and end <= last_end:
+ return last_content[start - last_start:end - last_start]
+
+ r = self._session.get(self._url, headers={
+ 'Range': 'bytes={}-{}'.format(start, max(start + 1024, end)),
+ })
+ self._last = (start, r.content)
+ return memoryview(r.content)[:end - start]
+
+ def __len__(self):
+ return self._len
+
+
def main(args):
parser = argparse.ArgumentParser()
parser.add_argument("-C", metavar='DIR', default=".",
help="Change to given directory before extracting")
parser.add_argument("-l", action='store_true',
help="List files")
parser.add_argument("zip", help="Path to zip file")
parser.add_argument("files", nargs="*",
help="Path to files to extract from zip")
args = parser.parse_args(args)
- jar = JarReader(file=args.zip)
+ if '://' in args.zip:
+ jar = JarReader(data=HTTPRangeData(args.zip))
+ else:
+ jar = JarReader(file=args.zip)
for entry in jar.entries:
if not args.files or any(mozpath.match(entry, f)
for f in args.files):
if args.l:
print entry
else:
print 'Extracting {}'.format(entry)
--- a/python/mozbuild/mozpack/mozjar.py
+++ b/python/mozbuild/mozpack/mozjar.py
@@ -336,17 +336,19 @@ class JarReader(object):
'''
Opens the given file as a Jar archive. Use the given file-like object
if one is given instead of opening the given file name.
'''
if fileobj:
data = fileobj.read()
elif file:
data = open(file, 'rb').read()
- self._data = memoryview(data)
+ self._data = data
+ if isinstance(self._data, str):
+ self._data = memoryview(self._data)
# The End of Central Directory Record has a variable size because of
# comments it may contain, so scan for it from the end of the file.
offset = -CDIR_END_SIZE
while True:
signature = JarStruct.get_data('uint32', self._data[offset:])[0]
if signature == JarCdirEnd.MAGIC:
break
if offset == -len(self._data):
@@ -368,19 +370,22 @@ class JarReader(object):
directory. Directory entries are skipped.
'''
if hasattr(self, '_entries'):
return self._entries
preload = 0
if self.is_optimized:
preload = JarStruct.get_data('uint32', self._data)[0]
entries = OrderedDict()
- offset = self._cdir_end['cdir_offset']
+ cdir_offset = self._cdir_end['cdir_offset']
+ cdir_size = self._cdir_end['cdir_size']
+ cdir_data = self._data[cdir_offset:cdir_offset + cdir_size]
+ offset = 0
for e in xrange(self._cdir_end['cdir_entries']):
- entry = JarCdirEntry(self._data[offset:])
+ entry = JarCdirEntry(cdir_data[offset:])
offset += entry.size
# Creator host system. 0 is MSDOS, 3 is Unix
host = entry['creator_version'] >> 8
# External attributes values depend on host above. On Unix the
# higher bits are the stat.st_mode value. On MSDOS, the lower bits
# are the FAT attributes.
xattr = entry['external_attr']
# Skip directories