Avoid doubly-compressing zstandard files

This commit is contained in:
Sybren A. Stüvel 2022-02-03 17:52:14 +01:00
parent 2bf155a5f9
commit e5c7e38b44
5 changed files with 18 additions and 8 deletions

View File

@ -3,6 +3,10 @@
This file logs the changes that are actually interesting to users (new features, This file logs the changes that are actually interesting to users (new features,
changed functionality, fixed bugs). changed functionality, fixed bugs).
# Version 1.10 (2022-02-03)
- Avoid doubly-compressing ZStandard (Blender 3) compressed files.
# Version 1.9 (2021-11-19) # Version 1.9 (2021-11-19)
- Add `bat version` command to print just the version number and exit. - Add `bat version` command to print just the version number and exit.

View File

@ -73,7 +73,7 @@ class Compression(enum.Enum):
def open(path: pathlib.Path, mode: str, buffer_size: int) -> DecompressedFileInfo: def open(path: pathlib.Path, mode: str, buffer_size: int) -> DecompressedFileInfo:
"""Open the file, decompressing it into a temporary file if necesssary.""" """Open the file, decompressing it into a temporary file if necesssary."""
fileobj = path.open(mode, buffering=buffer_size) # typing.IO[bytes] fileobj = path.open(mode, buffering=buffer_size) # typing.IO[bytes]
compression = _find_compression_type(fileobj) compression = find_compression_type(fileobj)
if compression == Compression.UNRECOGNISED: if compression == Compression.UNRECOGNISED:
fileobj.close() fileobj.close()
@ -113,7 +113,7 @@ def open(path: pathlib.Path, mode: str, buffer_size: int) -> DecompressedFileInf
) )
def _find_compression_type(fileobj: typing.IO[bytes]) -> Compression: def find_compression_type(fileobj: typing.IO[bytes]) -> Compression:
fileobj.seek(0, os.SEEK_SET) fileobj.seek(0, os.SEEK_SET)
# This assumes that all magics are not longer than "BLENDER". # This assumes that all magics are not longer than "BLENDER".

View File

@ -5,6 +5,8 @@ import logging
import pathlib import pathlib
import shutil import shutil
from blender_asset_tracer.blendfile import magic_compression
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Arbitrarily chosen block size, in bytes. # Arbitrarily chosen block size, in bytes.
@ -55,11 +57,12 @@ def _move_or_copy(
""" """
srcfile = src.open("rb") srcfile = src.open("rb")
try: try:
first_bytes = srcfile.read(2) comp_type = magic_compression.find_compression_type(srcfile)
if first_bytes == b"\x1f\x8b": if comp_type != magic_compression.Compression.NONE:
# Already a gzipped file. # Either already compressed or not a blend file.
# Either way we shouldn't attempt compressing this file.
srcfile.close() srcfile.close()
my_log.debug("Source file %s is GZipped already", src) my_log.debug("Source file %s is compressed already", src)
if source_must_remain: if source_must_remain:
shutil.copy2(str(src), str(dest)) shutil.copy2(str(src), str(dest))
else: else:
@ -67,8 +70,8 @@ def _move_or_copy(
return return
my_log.debug("Compressing %s on the fly while copying to %s", src, dest) my_log.debug("Compressing %s on the fly while copying to %s", src, dest)
srcfile.seek(0)
with gzip.open(str(dest), mode="wb") as destfile: with gzip.open(str(dest), mode="wb") as destfile:
destfile.write(first_bytes)
shutil.copyfileobj(srcfile, destfile, BLOCK_SIZE) shutil.copyfileobj(srcfile, destfile, BLOCK_SIZE)
srcfile.close() srcfile.close()

View File

@ -283,7 +283,7 @@ class CompressionRecognitionTest(AbstractBlendFileTest):
def _find_compression_type(self, filename: str) -> magic_compression.Compression: def _find_compression_type(self, filename: str) -> magic_compression.Compression:
path = self.blendfiles / filename path = self.blendfiles / filename
with path.open("rb") as fileobj: with path.open("rb") as fileobj:
return magic_compression._find_compression_type(fileobj) return magic_compression.find_compression_type(fileobj)
def test_gzip_recognition(self): def test_gzip_recognition(self):
comp = self._find_compression_type("basic_file_compressed.blend") comp = self._find_compression_type("basic_file_compressed.blend")

View File

@ -68,6 +68,9 @@ class CompressorTest(AbstractBlendFileTest):
def test_copy_already_compressed(self): def test_copy_already_compressed(self):
self._test("basic_file_ñønæščii.blend", True) self._test("basic_file_ñønæščii.blend", True)
def test_copy_zstandard_compressed(self):
self._test("basic_file_zstandard.blend", True)
def test_copy_compress_on_the_fly(self): def test_copy_compress_on_the_fly(self):
self._test("basic_file.blend", True) self._test("basic_file.blend", True)