From 706e589562c5f2a6beae573c25704494dd53f43b Mon Sep 17 00:00:00 2001 From: Guillaume Pellerin Date: Tue, 10 Mar 2015 14:23:00 +0100 Subject: [PATCH] go upstream for zipstream --- setup.py | 1 + telemeta/util/zipstream/__init__.py | 407 ---------------------------- telemeta/util/zipstream/compat.py | 68 ----- telemeta/views/collection.py | 15 +- 4 files changed, 9 insertions(+), 482 deletions(-) delete mode 100644 telemeta/util/zipstream/__init__.py delete mode 100644 telemeta/util/zipstream/compat.py diff --git a/setup.py b/setup.py index 921614e0..1c15caa1 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,7 @@ setup( 'pyyaml', 'python-ebml', 'mysql', + 'zipstream', ], tests_require=['pytest-django', 'pytest-cov', 'factory-boy'], # Provide a test command through django-setuptest diff --git a/telemeta/util/zipstream/__init__.py b/telemeta/util/zipstream/__init__.py deleted file mode 100644 index 811e2cfe..00000000 --- a/telemeta/util/zipstream/__init__.py +++ /dev/null @@ -1,407 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Iterable ZIP archive generator. - -Derived directly from zipfile.py -""" -from __future__ import unicode_literals, print_function, with_statement - -__version__ = '1.0.3' - -import os -import sys -import stat -import struct -import time -import zipfile - -from .compat import ( - str, bytes, - ZIP64_VERSION, - ZIP_BZIP2, BZIP2_VERSION, - ZIP_LZMA, LZMA_VERSION) - -from zipfile import ( - ZIP_STORED, ZIP64_LIMIT, ZIP_FILECOUNT_LIMIT, ZIP_MAX_COMMENT, - ZIP_DEFLATED, - structCentralDir, structEndArchive64, structEndArchive, structEndArchive64Locator, - stringCentralDir, stringEndArchive64, stringEndArchive, stringEndArchive64Locator, - structFileHeader, stringFileHeader, - zlib, crc32) - -stringDataDescriptor = b'PK\x07\x08' # magic number for data descriptor - - -def _get_compressor(compress_type): - if compress_type == ZIP_DEFLATED: - return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, - zlib.DEFLATED, -15) - elif compress_type == ZIP_BZIP2: - from zipfile import bz2 - return bz2.BZ2Compressor() - elif compress_type == ZIP_LZMA: - from zipfile import LZMACompressor - return LZMACompressor() - else: - return None - - -class PointerIO(object): - def __init__(self, mode='wb'): - if mode not in ('wb', ): - raise RuntimeError('zipstream.ZipFile() requires mode "wb"') - self.data_pointer = 0 - self.__mode = mode - self.__closed = False - - @property - def mode(self): - return self.__mode - - @property - def closed(self): - return self.__closed - - def close(self): - self.__closed = True - - def flush(self): - pass - - def next(self): - raise NotImplementedError() - - def seek(self, offset, whence): - raise NotImplementedError() - - def tell(self): - return self.data_pointer - - def truncate(size=None): - raise NotImplementedError() - - def write(self, data): - if self.closed: - raise ValueError('I/O operation on closed file') - - if isinstance(data, str): - data = data.encode('utf-8') - if not isinstance(data, bytes): - raise TypeError('expected bytes') - self.data_pointer += len(data) - return data - - -class ZipInfo(zipfile.ZipInfo): - def __init__(self, *args, **kwargs): - zipfile.ZipInfo.__init__(self, *args, **kwargs) - self.flag_bits = 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor - - def FileHeader(self, zip64=None): - """Return the per-file header as a string.""" - dt = self.date_time - dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] - dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) - if self.flag_bits & 0x08: - # Set these to zero because we write them after the file data - CRC = compress_size = file_size = 0 - else: - CRC = self.CRC - compress_size = self.compress_size - file_size = self.file_size - - extra = self.extra - - min_version = 0 - if zip64 is None: - zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT - if zip64: - fmt = ' ZIP64_LIMIT or compress_size > ZIP64_LIMIT: - if not zip64: - raise LargeZipFile("Filesize would require ZIP64 extensions") - # File is larger than what fits into a 4 byte integer, - # fall back to the ZIP64 extension - file_size = 0xffffffff - compress_size = 0xffffffff - min_version = ZIP64_VERSION - - if self.compress_type == ZIP_BZIP2: - min_version = max(BZIP2_VERSION, min_version) - elif self.compress_type == ZIP_LZMA: - min_version = max(LZMA_VERSION, min_version) - - self.extract_version = max(min_version, self.extract_version) - self.create_version = max(min_version, self.create_version) - filename, flag_bits = self._encodeFilenameFlags() - header = struct.pack(structFileHeader, stringFileHeader, - self.extract_version, self.reserved, flag_bits, - self.compress_type, dostime, dosdate, CRC, - compress_size, file_size, - len(filename), len(extra)) - return header + filename + extra - - def DataDescriptor(self): - """ - crc-32 4 bytes - compressed size 4 bytes - uncompressed size 4 bytes - """ - if self.compress_size > ZIP64_LIMIT or self.file_size > ZIP64_LIMIT: - fmt = b'<4sLQQ' - else: - fmt = b'<4sLLL' - return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size) - - -class ZipFile(zipfile.ZipFile): - def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=False): - """Open the ZIP file with mode write "w".""" - if mode not in ('w', ): - raise RuntimeError('zipstream.ZipFile() requires mode "w"') - if fileobj is None: - fileobj = PointerIO() - - self._comment = b'' - zipfile.ZipFile.__init__(self, fileobj, mode=mode, compression=compression, allowZip64=allowZip64) - # TODO: Refractor to write queue with args + kwargs matching write() - self.paths_to_write = [] - - def __iter__(self): - for args, kwargs in self.paths_to_write: - for data in self.__write(*args, **kwargs): - yield data - for data in self.__close(): - yield data - - def __enter__(self): - return self - - def __exit__(self, type, value, traceback): - self.close() - - @property - def comment(self): - """The comment text associated with the ZIP file.""" - return self._comment - - @comment.setter - def comment(self, comment): - if not isinstance(comment, bytes): - raise TypeError("comment: expected bytes, got %s" % type(comment)) - # check for valid comment length - if len(comment) >= ZIP_MAX_COMMENT: - if self.debug: - print('Archive comment is too long; truncating to %d bytes' - % ZIP_MAX_COMMENT) - comment = comment[:ZIP_MAX_COMMENT] - self._comment = comment - self._didModify = True - - def write(self, filename, arcname=None, compress_type=None): - # TODO: Reflect python's Zipfile.write - # - if filename is file, write as file - # - if filename is directory, write an empty directory - self.paths_to_write.append( - ((filename, ), {'arcname': arcname, 'compress_type': compress_type}), - ) - - def __write(self, filename, arcname=None, compress_type=None): - """Put the bytes from filename into the archive under the name - arcname.""" - if not self.fp: - raise RuntimeError( - "Attempt to write to ZIP archive that was already closed") - - st = os.stat(filename) - isdir = stat.S_ISDIR(st.st_mode) - mtime = time.localtime(st.st_mtime) - date_time = mtime[0:6] - # Create ZipInfo instance to store file information - if arcname is None: - arcname = filename - arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) - while arcname[0] in (os.sep, os.altsep): - arcname = arcname[1:] - if isdir: - arcname += '/' - zinfo = ZipInfo(arcname, date_time) - zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes - if compress_type is None: - zinfo.compress_type = self.compression - else: - zinfo.compress_type = compress_type - - zinfo.file_size = st.st_size - zinfo.flag_bits = 0x00 - zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor - zinfo.header_offset = self.fp.tell() # Start of header bytes - if zinfo.compress_type == ZIP_LZMA: - # Compressed data includes an end-of-stream (EOS) marker - zinfo.flag_bits |= 0x02 - - self._writecheck(zinfo) - self._didModify = True - - if isdir: - zinfo.file_size = 0 - zinfo.compress_size = 0 - zinfo.CRC = 0 - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo - yield self.fp.write(zinfo.FileHeader(False)) - return - - cmpr = _get_compressor(zinfo.compress_type) - with open(filename, 'rb') as fp: - # Must overwrite CRC and sizes with correct data later - zinfo.CRC = CRC = 0 - zinfo.compress_size = compress_size = 0 - # Compressed size can be larger than uncompressed size - zip64 = self._allowZip64 and \ - zinfo.file_size * 1.05 > ZIP64_LIMIT - yield self.fp.write(zinfo.FileHeader(zip64)) - file_size = 0 - while 1: - buf = fp.read(1024 * 8) - if not buf: - break - file_size = file_size + len(buf) - CRC = crc32(buf, CRC) & 0xffffffff - if cmpr: - buf = cmpr.compress(buf) - compress_size = compress_size + len(buf) - yield self.fp.write(buf) - if cmpr: - buf = cmpr.flush() - compress_size = compress_size + len(buf) - yield self.fp.write(buf) - zinfo.compress_size = compress_size - else: - zinfo.compress_size = file_size - zinfo.CRC = CRC - zinfo.file_size = file_size - if not zip64 and self._allowZip64: - if file_size > ZIP64_LIMIT: - raise RuntimeError('File size has increased during compressing') - if compress_size > ZIP64_LIMIT: - raise RuntimeError('Compressed size larger than uncompressed size') - - # Seek backwards and write file header (which will now include - # correct CRC and file sizes) - # position = self.fp.tell() # Preserve current position in file - # self.fp.seek(zinfo.header_offset, 0) - # self.fp.write(zinfo.FileHeader(zip64)) - # self.fp.seek(position, 0) - yield self.fp.write(zinfo.DataDescriptor()) - self.filelist.append(zinfo) - self.NameToInfo[zinfo.filename] = zinfo - - def __close(self): - """Close the file, and for mode "w" write the ending - records.""" - if self.fp is None: - return - - try: - if self.mode in ('w', 'a') and self._didModify: # write ending records - count = 0 - pos1 = self.fp.tell() - for zinfo in self.filelist: # write central directory - count = count + 1 - dt = zinfo.date_time - dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] - dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) - extra = [] - if zinfo.file_size > ZIP64_LIMIT \ - or zinfo.compress_size > ZIP64_LIMIT: - extra.append(zinfo.file_size) - extra.append(zinfo.compress_size) - file_size = 0xffffffff - compress_size = 0xffffffff - else: - file_size = zinfo.file_size - compress_size = zinfo.compress_size - - if zinfo.header_offset > ZIP64_LIMIT: - extra.append(zinfo.header_offset) - header_offset = 0xffffffff - else: - header_offset = zinfo.header_offset - - extra_data = zinfo.extra - min_version = 0 - if extra: - # Append a ZIP64 field to the extra's - extra_data = struct.pack( - b'= ZIP_FILECOUNT_LIMIT or - centDirOffset > ZIP64_LIMIT or - centDirSize > ZIP64_LIMIT): - # Need to write the ZIP64 end-of-archive records - zip64endrec = struct.pack( - structEndArchive64, stringEndArchive64, - 44, 45, 45, 0, 0, centDirCount, centDirCount, - centDirSize, centDirOffset) - yield self.fp.write(zip64endrec) - - zip64locrec = struct.pack( - structEndArchive64Locator, - stringEndArchive64Locator, 0, pos2, 1) - yield self.fp.write(zip64locrec) - centDirCount = min(centDirCount, 0xFFFF) - centDirSize = min(centDirSize, 0xFFFFFFFF) - centDirOffset = min(centDirOffset, 0xFFFFFFFF) - - endrec = struct.pack(structEndArchive, stringEndArchive, - 0, 0, centDirCount, centDirCount, - centDirSize, centDirOffset, len(self._comment)) - yield self.fp.write(endrec) - yield self.fp.write(self._comment) - self.fp.flush() - finally: - fp = self.fp - self.fp = None - if not self._filePassed: - fp.close() diff --git a/telemeta/util/zipstream/compat.py b/telemeta/util/zipstream/compat.py deleted file mode 100644 index fbc99fa5..00000000 --- a/telemeta/util/zipstream/compat.py +++ /dev/null @@ -1,68 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -pythoncompat - -Copied from requests -""" - -import sys - -# ------- -# Pythons -# ------- - - -PY2 = sys.version_info[0] == 2 -PY3 = sys.version_info[0] == 3 - - -# --------- -# Specifics -# --------- - -if PY2: - builtin_str = str - bytes = str - str = unicode - basestring = basestring - numeric_types = (int, long, float) - - -elif PY3: - builtin_str = str - str = str - bytes = bytes - basestring = (str, bytes) - numeric_types = (int, float) - - -try: - from zipfile import ZIP64_VERSION -except ImportError: - ZIP64_VERSION = 45 - -try: - from zipfile import BZIP2_VERSION -except ImportError: - BZIP2_VERSION = 46 - -try: - from zipfile import ZIP_BZIP2 -except ImportError: - ZIP_BZIP2 = 12 - -try: - from zipfile import LZMA_VERSION -except ImportError: - LZMA_VERSION = 63 - -try: - from zipfile import ZIP_LZMA -except ImportError: - ZIP_LZMA = 14 - -try: - from zipfile import ZIP_MAX_COMMENT -except ImportError: - ZIP_MAX_COMMENT = (1 << 16) - 1 \ No newline at end of file diff --git a/telemeta/views/collection.py b/telemeta/views/collection.py index 8eb16963..f200ab38 100644 --- a/telemeta/views/collection.py +++ b/telemeta/views/collection.py @@ -187,10 +187,11 @@ class CollectionPackageView(View): """ from telemeta.views import MarkerView from telemeta.backup import CollectionSerializer - from telemeta.util import zipstream + import zipstream + from zipfile import ZIP_DEFLATED import json - z = zipstream.ZipFile() + zip_file = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED) cache_data = TelemetaCache(settings.TELEMETA_DATA_CACHE_DIR) collection = self.get_object() @@ -200,18 +201,18 @@ class CollectionPackageView(View): filename = collection.public_id + '.json' cache_data.write_bin(data, filename) path = cache_data.dir + os.sep + filename - z.write(path, arcname=collection.public_id + os.sep + filename) + zip_file.write(path, arcname=collection.public_id + os.sep + filename) data = serializer.get_xml().encode('utf-8') filename = collection.public_id + '.xml' cache_data.write_bin(data, filename) path = cache_data.dir + os.sep + filename - z.write(path, arcname=collection.public_id + os.sep + filename) + zip_file.write(path, arcname=collection.public_id + os.sep + filename) for item in collection.items.all(): if item.file: filename, ext = os.path.splitext(item.file.path.split(os.sep)[-1]) - z.write(item.file.path, arcname=collection.public_id + os.sep + item.code + ext) + zip_file.write(item.file.path, arcname=collection.public_id + os.sep + item.code + ext) marker_view = MarkerView() markers = marker_view.get_markers(item.id) if markers: @@ -219,9 +220,9 @@ class CollectionPackageView(View): filename = item.code + '.json' cache_data.write_bin(data, filename) path = cache_data.dir + os.sep + filename - z.write(path, arcname=collection.public_id + os.sep + filename) + zip_file.write(path, arcname=collection.public_id + os.sep + filename) - response = StreamingHttpResponse(z, content_type='application/zip') + response = StreamingHttpResponse(zip_file, content_type='application/zip') response['Content-Disposition'] = "attachment; filename=%s.%s" % \ (collection.code, 'zip') return response -- 2.39.5