From: Guillaume Pellerin Date: Thu, 20 Mar 2014 08:08:16 +0000 (+0100) Subject: include zipstream in the package X-Git-Tag: 1.4.6^2~24 X-Git-Url: https://git.parisson.com/?a=commitdiff_plain;h=6e8a6def0ec86ec7a32a7ac47c4adab88f18303e;p=telemeta.git include zipstream in the package --- diff --git a/setup.py b/setup.py index 31c8d709..74e71e18 100644 --- a/setup.py +++ b/setup.py @@ -41,9 +41,6 @@ setup( 'pyyaml', 'python-ebml', ], - dependency_links=[ - "https://github.com/yomguy/python-zipstream/zipball/master", - ], platforms=['OS Independent'], license='CeCILL v2', classifiers = CLASSIFIERS, diff --git a/telemeta/util/zipstream/__init__.py b/telemeta/util/zipstream/__init__.py new file mode 100644 index 00000000..811e2cfe --- /dev/null +++ b/telemeta/util/zipstream/__init__.py @@ -0,0 +1,407 @@ +# -*- coding: utf-8 -*- +""" +Iterable ZIP archive generator. + +Derived directly from zipfile.py +""" +from __future__ import unicode_literals, print_function, with_statement + +__version__ = '1.0.3' + +import os +import sys +import stat +import struct +import time +import zipfile + +from .compat import ( + str, bytes, + ZIP64_VERSION, + ZIP_BZIP2, BZIP2_VERSION, + ZIP_LZMA, LZMA_VERSION) + +from zipfile import ( + ZIP_STORED, ZIP64_LIMIT, ZIP_FILECOUNT_LIMIT, ZIP_MAX_COMMENT, + ZIP_DEFLATED, + structCentralDir, structEndArchive64, structEndArchive, structEndArchive64Locator, + stringCentralDir, stringEndArchive64, stringEndArchive, stringEndArchive64Locator, + structFileHeader, stringFileHeader, + zlib, crc32) + +stringDataDescriptor = b'PK\x07\x08' # magic number for data descriptor + + +def _get_compressor(compress_type): + if compress_type == ZIP_DEFLATED: + return zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, + zlib.DEFLATED, -15) + elif compress_type == ZIP_BZIP2: + from zipfile import bz2 + return bz2.BZ2Compressor() + elif compress_type == ZIP_LZMA: + from zipfile import LZMACompressor + return LZMACompressor() + else: + return None + + +class PointerIO(object): + def __init__(self, mode='wb'): + if mode not in ('wb', ): + raise RuntimeError('zipstream.ZipFile() requires mode "wb"') + self.data_pointer = 0 + self.__mode = mode + self.__closed = False + + @property + def mode(self): + return self.__mode + + @property + def closed(self): + return self.__closed + + def close(self): + self.__closed = True + + def flush(self): + pass + + def next(self): + raise NotImplementedError() + + def seek(self, offset, whence): + raise NotImplementedError() + + def tell(self): + return self.data_pointer + + def truncate(size=None): + raise NotImplementedError() + + def write(self, data): + if self.closed: + raise ValueError('I/O operation on closed file') + + if isinstance(data, str): + data = data.encode('utf-8') + if not isinstance(data, bytes): + raise TypeError('expected bytes') + self.data_pointer += len(data) + return data + + +class ZipInfo(zipfile.ZipInfo): + def __init__(self, *args, **kwargs): + zipfile.ZipInfo.__init__(self, *args, **kwargs) + self.flag_bits = 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor + + def FileHeader(self, zip64=None): + """Return the per-file header as a string.""" + dt = self.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + if self.flag_bits & 0x08: + # Set these to zero because we write them after the file data + CRC = compress_size = file_size = 0 + else: + CRC = self.CRC + compress_size = self.compress_size + file_size = self.file_size + + extra = self.extra + + min_version = 0 + if zip64 is None: + zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT + if zip64: + fmt = ' ZIP64_LIMIT or compress_size > ZIP64_LIMIT: + if not zip64: + raise LargeZipFile("Filesize would require ZIP64 extensions") + # File is larger than what fits into a 4 byte integer, + # fall back to the ZIP64 extension + file_size = 0xffffffff + compress_size = 0xffffffff + min_version = ZIP64_VERSION + + if self.compress_type == ZIP_BZIP2: + min_version = max(BZIP2_VERSION, min_version) + elif self.compress_type == ZIP_LZMA: + min_version = max(LZMA_VERSION, min_version) + + self.extract_version = max(min_version, self.extract_version) + self.create_version = max(min_version, self.create_version) + filename, flag_bits = self._encodeFilenameFlags() + header = struct.pack(structFileHeader, stringFileHeader, + self.extract_version, self.reserved, flag_bits, + self.compress_type, dostime, dosdate, CRC, + compress_size, file_size, + len(filename), len(extra)) + return header + filename + extra + + def DataDescriptor(self): + """ + crc-32 4 bytes + compressed size 4 bytes + uncompressed size 4 bytes + """ + if self.compress_size > ZIP64_LIMIT or self.file_size > ZIP64_LIMIT: + fmt = b'<4sLQQ' + else: + fmt = b'<4sLLL' + return struct.pack(fmt, stringDataDescriptor, self.CRC, self.compress_size, self.file_size) + + +class ZipFile(zipfile.ZipFile): + def __init__(self, fileobj=None, mode='w', compression=ZIP_STORED, allowZip64=False): + """Open the ZIP file with mode write "w".""" + if mode not in ('w', ): + raise RuntimeError('zipstream.ZipFile() requires mode "w"') + if fileobj is None: + fileobj = PointerIO() + + self._comment = b'' + zipfile.ZipFile.__init__(self, fileobj, mode=mode, compression=compression, allowZip64=allowZip64) + # TODO: Refractor to write queue with args + kwargs matching write() + self.paths_to_write = [] + + def __iter__(self): + for args, kwargs in self.paths_to_write: + for data in self.__write(*args, **kwargs): + yield data + for data in self.__close(): + yield data + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + + @property + def comment(self): + """The comment text associated with the ZIP file.""" + return self._comment + + @comment.setter + def comment(self, comment): + if not isinstance(comment, bytes): + raise TypeError("comment: expected bytes, got %s" % type(comment)) + # check for valid comment length + if len(comment) >= ZIP_MAX_COMMENT: + if self.debug: + print('Archive comment is too long; truncating to %d bytes' + % ZIP_MAX_COMMENT) + comment = comment[:ZIP_MAX_COMMENT] + self._comment = comment + self._didModify = True + + def write(self, filename, arcname=None, compress_type=None): + # TODO: Reflect python's Zipfile.write + # - if filename is file, write as file + # - if filename is directory, write an empty directory + self.paths_to_write.append( + ((filename, ), {'arcname': arcname, 'compress_type': compress_type}), + ) + + def __write(self, filename, arcname=None, compress_type=None): + """Put the bytes from filename into the archive under the name + arcname.""" + if not self.fp: + raise RuntimeError( + "Attempt to write to ZIP archive that was already closed") + + st = os.stat(filename) + isdir = stat.S_ISDIR(st.st_mode) + mtime = time.localtime(st.st_mtime) + date_time = mtime[0:6] + # Create ZipInfo instance to store file information + if arcname is None: + arcname = filename + arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) + while arcname[0] in (os.sep, os.altsep): + arcname = arcname[1:] + if isdir: + arcname += '/' + zinfo = ZipInfo(arcname, date_time) + zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes + if compress_type is None: + zinfo.compress_type = self.compression + else: + zinfo.compress_type = compress_type + + zinfo.file_size = st.st_size + zinfo.flag_bits = 0x00 + zinfo.flag_bits |= 0x08 # ZIP flag bits, bit 3 indicates presence of data descriptor + zinfo.header_offset = self.fp.tell() # Start of header bytes + if zinfo.compress_type == ZIP_LZMA: + # Compressed data includes an end-of-stream (EOS) marker + zinfo.flag_bits |= 0x02 + + self._writecheck(zinfo) + self._didModify = True + + if isdir: + zinfo.file_size = 0 + zinfo.compress_size = 0 + zinfo.CRC = 0 + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + yield self.fp.write(zinfo.FileHeader(False)) + return + + cmpr = _get_compressor(zinfo.compress_type) + with open(filename, 'rb') as fp: + # Must overwrite CRC and sizes with correct data later + zinfo.CRC = CRC = 0 + zinfo.compress_size = compress_size = 0 + # Compressed size can be larger than uncompressed size + zip64 = self._allowZip64 and \ + zinfo.file_size * 1.05 > ZIP64_LIMIT + yield self.fp.write(zinfo.FileHeader(zip64)) + file_size = 0 + while 1: + buf = fp.read(1024 * 8) + if not buf: + break + file_size = file_size + len(buf) + CRC = crc32(buf, CRC) & 0xffffffff + if cmpr: + buf = cmpr.compress(buf) + compress_size = compress_size + len(buf) + yield self.fp.write(buf) + if cmpr: + buf = cmpr.flush() + compress_size = compress_size + len(buf) + yield self.fp.write(buf) + zinfo.compress_size = compress_size + else: + zinfo.compress_size = file_size + zinfo.CRC = CRC + zinfo.file_size = file_size + if not zip64 and self._allowZip64: + if file_size > ZIP64_LIMIT: + raise RuntimeError('File size has increased during compressing') + if compress_size > ZIP64_LIMIT: + raise RuntimeError('Compressed size larger than uncompressed size') + + # Seek backwards and write file header (which will now include + # correct CRC and file sizes) + # position = self.fp.tell() # Preserve current position in file + # self.fp.seek(zinfo.header_offset, 0) + # self.fp.write(zinfo.FileHeader(zip64)) + # self.fp.seek(position, 0) + yield self.fp.write(zinfo.DataDescriptor()) + self.filelist.append(zinfo) + self.NameToInfo[zinfo.filename] = zinfo + + def __close(self): + """Close the file, and for mode "w" write the ending + records.""" + if self.fp is None: + return + + try: + if self.mode in ('w', 'a') and self._didModify: # write ending records + count = 0 + pos1 = self.fp.tell() + for zinfo in self.filelist: # write central directory + count = count + 1 + dt = zinfo.date_time + dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] + dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) + extra = [] + if zinfo.file_size > ZIP64_LIMIT \ + or zinfo.compress_size > ZIP64_LIMIT: + extra.append(zinfo.file_size) + extra.append(zinfo.compress_size) + file_size = 0xffffffff + compress_size = 0xffffffff + else: + file_size = zinfo.file_size + compress_size = zinfo.compress_size + + if zinfo.header_offset > ZIP64_LIMIT: + extra.append(zinfo.header_offset) + header_offset = 0xffffffff + else: + header_offset = zinfo.header_offset + + extra_data = zinfo.extra + min_version = 0 + if extra: + # Append a ZIP64 field to the extra's + extra_data = struct.pack( + b'= ZIP_FILECOUNT_LIMIT or + centDirOffset > ZIP64_LIMIT or + centDirSize > ZIP64_LIMIT): + # Need to write the ZIP64 end-of-archive records + zip64endrec = struct.pack( + structEndArchive64, stringEndArchive64, + 44, 45, 45, 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset) + yield self.fp.write(zip64endrec) + + zip64locrec = struct.pack( + structEndArchive64Locator, + stringEndArchive64Locator, 0, pos2, 1) + yield self.fp.write(zip64locrec) + centDirCount = min(centDirCount, 0xFFFF) + centDirSize = min(centDirSize, 0xFFFFFFFF) + centDirOffset = min(centDirOffset, 0xFFFFFFFF) + + endrec = struct.pack(structEndArchive, stringEndArchive, + 0, 0, centDirCount, centDirCount, + centDirSize, centDirOffset, len(self._comment)) + yield self.fp.write(endrec) + yield self.fp.write(self._comment) + self.fp.flush() + finally: + fp = self.fp + self.fp = None + if not self._filePassed: + fp.close() diff --git a/telemeta/util/zipstream/compat.py b/telemeta/util/zipstream/compat.py new file mode 100644 index 00000000..fbc99fa5 --- /dev/null +++ b/telemeta/util/zipstream/compat.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +""" +pythoncompat + +Copied from requests +""" + +import sys + +# ------- +# Pythons +# ------- + + +PY2 = sys.version_info[0] == 2 +PY3 = sys.version_info[0] == 3 + + +# --------- +# Specifics +# --------- + +if PY2: + builtin_str = str + bytes = str + str = unicode + basestring = basestring + numeric_types = (int, long, float) + + +elif PY3: + builtin_str = str + str = str + bytes = bytes + basestring = (str, bytes) + numeric_types = (int, float) + + +try: + from zipfile import ZIP64_VERSION +except ImportError: + ZIP64_VERSION = 45 + +try: + from zipfile import BZIP2_VERSION +except ImportError: + BZIP2_VERSION = 46 + +try: + from zipfile import ZIP_BZIP2 +except ImportError: + ZIP_BZIP2 = 12 + +try: + from zipfile import LZMA_VERSION +except ImportError: + LZMA_VERSION = 63 + +try: + from zipfile import ZIP_LZMA +except ImportError: + ZIP_LZMA = 14 + +try: + from zipfile import ZIP_MAX_COMMENT +except ImportError: + ZIP_MAX_COMMENT = (1 << 16) - 1 \ No newline at end of file diff --git a/telemeta/views/collection.py b/telemeta/views/collection.py index d5e4c923..c2c3755c 100644 --- a/telemeta/views/collection.py +++ b/telemeta/views/collection.py @@ -175,8 +175,8 @@ class CollectionPackageView(View): """ from telemeta.views import MarkerView from telemeta.backup import CollectionSerializer + from telemeta.util import zipstream import json - import zipstream z = zipstream.ZipFile() cache_data = TelemetaCache(settings.TELEMETA_DATA_CACHE_DIR)