From fc32ebd453372c17a34c009c3787ae8b44603ff2 Mon Sep 17 00:00:00 2001 From: pullusb Date: Tue, 10 Jan 2023 11:41:55 +0100 Subject: [PATCH] update BAT module to 1.15 version --- __init__.py | 7 +- blendfile/__init__.py | 315 +++++++++++++++++++-------------- blendfile/dna.py | 144 ++++++++------- blendfile/dna_io.py | 120 ++++++++++--- blendfile/exceptions.py | 2 +- blendfile/header.py | 39 ++-- blendfile/iterators.py | 22 +-- blendfile/magic_compression.py | 169 ++++++++++++++++++ bpathlib.py | 65 ++++--- cdefs.py | 37 +++- cli/__init__.py | 100 +++++++---- cli/blocks.py | 93 ++++++---- cli/common.py | 33 ++-- cli/list_deps.py | 43 +++-- cli/pack.py | 188 +++++++++++++------- cli/version.py | 32 ++++ compressor.py | 46 ++--- pack/__init__.py | 189 ++++++++++++-------- pack/filesystem.py | 57 +++--- pack/progress.py | 25 +-- pack/s3.py | 73 ++++---- pack/shaman/__init__.py | 65 ++++--- pack/shaman/cache.py | 52 +++--- pack/shaman/client.py | 28 +-- pack/shaman/transfer.py | 173 +++++++++++------- pack/transfer.py | 50 ++++-- pack/zipped.py | 16 +- trace/__init__.py | 24 +-- trace/blocks2assets.py | 101 ++++++----- trace/expanders.py | 211 ++++++++++++++-------- trace/file2blocks.py | 61 ++++--- trace/file_sequence.py | 17 +- trace/modifier_walkers.py | 236 +++++++++++++++--------- trace/result.py | 87 +++++---- 34 files changed, 1878 insertions(+), 1042 deletions(-) create mode 100644 blendfile/magic_compression.py create mode 100644 cli/version.py diff --git a/__init__.py b/__init__.py index 7d89f0d..9bf1195 100644 --- a/__init__.py +++ b/__init__.py @@ -20,13 +20,14 @@ # -__version__ = '1.3' -# branched from source BAT since 1.3.0 +__version__ = '1.15' + +### --- BAT used as an addon with following code --- bl_info = { "name": "Blender Asset Tracer", "author": "Campbell Barton, Sybren A. Stüvel, Loïc Charrière and Clément Ducarteron", - "version": (1, 3, 4), + "version": (1, 15, 0), "blender": (2, 80, 0), "location": "File > External Data > BAT", "description": "Utility for packing blend files", diff --git a/blendfile/__init__.py b/blendfile/__init__.py index 29afff4..4b6cd10 100644 --- a/blendfile/__init__.py +++ b/blendfile/__init__.py @@ -32,40 +32,39 @@ import shutil import tempfile import typing -from . import exceptions, dna_io, dna, header +from . import exceptions, dna, header, magic_compression from blender_asset_tracer import bpathlib log = logging.getLogger(__name__) FILE_BUFFER_SIZE = 1024 * 1024 -BLENDFILE_MAGIC = b'BLENDER' -GZIP_MAGIC = b'\x1f\x8b' -BFBList = typing.List['BlendFileBlock'] +BFBList = typing.List["BlendFileBlock"] _cached_bfiles = {} # type: typing.Dict[pathlib.Path, BlendFile] -def open_cached(path: pathlib.Path, mode='rb', - assert_cached: typing.Optional[bool] = None) -> 'BlendFile': +def open_cached( + path: pathlib.Path, mode="rb", assert_cached: typing.Optional[bool] = None +) -> "BlendFile": """Open a blend file, ensuring it is only opened once.""" - my_log = log.getChild('open_cached') + my_log = log.getChild("open_cached") bfile_path = bpathlib.make_absolute(path) if assert_cached is not None: is_cached = bfile_path in _cached_bfiles if assert_cached and not is_cached: - raise AssertionError('File %s was not cached' % bfile_path) + raise AssertionError("File %s was not cached" % bfile_path) elif not assert_cached and is_cached: - raise AssertionError('File %s was cached' % bfile_path) + raise AssertionError("File %s was cached" % bfile_path) try: bfile = _cached_bfiles[bfile_path] except KeyError: - my_log.debug('Opening non-cached %s', path) + my_log.debug("Opening non-cached %s", path) bfile = BlendFile(path, mode=mode) _cached_bfiles[bfile_path] = bfile else: - my_log.debug('Returning cached %s', path) + my_log.debug("Returning cached %s", path) return bfile @@ -76,13 +75,13 @@ def close_all_cached() -> None: # Don't even log anything when there is nothing to close return - log.debug('Closing %d cached blend files', len(_cached_bfiles)) + log.debug("Closing %d cached blend files", len(_cached_bfiles)) for bfile in list(_cached_bfiles.values()): bfile.close() _cached_bfiles.clear() -def _cache(path: pathlib.Path, bfile: 'BlendFile'): +def _cache(path: pathlib.Path, bfile: "BlendFile"): """Add a BlendFile to the cache.""" bfile_path = bpathlib.make_absolute(path) _cached_bfiles[bfile_path] = bfile @@ -102,9 +101,16 @@ class BlendFile: uncompressed files, but a temporary file for compressed files. :ivar fileobj: the file object that's being accessed. """ - log = log.getChild('BlendFile') - def __init__(self, path: pathlib.Path, mode='rb') -> None: + log = log.getChild("BlendFile") + + strict_pointer_mode = True + """Raise exceptions.SegmentationFault when dereferencing an unknown pointer. + + Set to False to disable this exception, and to return None instead. + """ + + def __init__(self, path: pathlib.Path, mode="rb") -> None: """Create a BlendFile instance for the blend file at the path. Opens the file for reading or writing pending on the access. Compressed @@ -121,7 +127,9 @@ class BlendFile: self.blocks = [] # type: BFBList """BlendFileBlocks of this file, in disk order.""" - self.code_index = collections.defaultdict(list) # type: typing.Dict[bytes, BFBList] + self.code_index = collections.defaultdict( + list + ) # type: typing.Dict[bytes, BFBList] self.structs = [] # type: typing.List[dna.Struct] self.sdna_index_from_id = {} # type: typing.Dict[bytes, int] self.block_from_addr = {} # type: typing.Dict[int, BlendFileBlock] @@ -141,44 +149,13 @@ class BlendFile: correct magic bytes. """ - if 'b' not in mode: - raise ValueError('Only binary modes are supported, not %r' % mode) + decompressed = magic_compression.open(path, mode, FILE_BUFFER_SIZE) self.filepath = path + self.is_compressed = decompressed.is_compressed + self.raw_filepath = decompressed.path - fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) # typing.IO[bytes] - fileobj.seek(0, os.SEEK_SET) - - magic = fileobj.read(len(BLENDFILE_MAGIC)) - if magic == BLENDFILE_MAGIC: - self.is_compressed = False - self.raw_filepath = path - return fileobj - - if magic[:2] == GZIP_MAGIC: - self.is_compressed = True - - log.debug("compressed blendfile detected: %s", path) - # Decompress to a temporary file. - tmpfile = tempfile.NamedTemporaryFile() - fileobj.seek(0, os.SEEK_SET) - with gzip.GzipFile(fileobj=fileobj, mode=mode) as gzfile: - magic = gzfile.read(len(BLENDFILE_MAGIC)) - if magic != BLENDFILE_MAGIC: - raise exceptions.BlendFileError("Compressed file is not a blend file", path) - - data = magic - while data: - tmpfile.write(data) - data = gzfile.read(FILE_BUFFER_SIZE) - - # Further interaction should be done with the uncompressed file. - self.raw_filepath = pathlib.Path(tmpfile.name) - fileobj.close() - return tmpfile - - fileobj.close() - raise exceptions.BlendFileError("File is not a blend file", path) + return decompressed.fileobj def _load_blocks(self) -> None: """Read the blend file to load its DNA structure to memory.""" @@ -187,10 +164,10 @@ class BlendFile: self.sdna_index_from_id.clear() while True: block = BlendFileBlock(self) - if block.code == b'ENDB': + if block.code == b"ENDB": break - if block.code == b'DNA1': + if block.code == b"DNA1": self.decode_structs(block) else: self.fileobj.seek(block.size, os.SEEK_CUR) @@ -200,35 +177,37 @@ class BlendFile: self.block_from_addr[block.addr_old] = block if not self.structs: - raise exceptions.NoDNA1Block("No DNA1 block in file, not a valid .blend file", - self.filepath) + raise exceptions.NoDNA1Block( + "No DNA1 block in file, not a valid .blend file", self.filepath + ) def __repr__(self) -> str: clsname = self.__class__.__qualname__ if self.filepath == self.raw_filepath: - return '<%s %r>' % (clsname, self.filepath) - return '<%s %r reading from %r>' % (clsname, self.filepath, self.raw_filepath) + return "<%s %r>" % (clsname, self.filepath) + return "<%s %r reading from %r>" % (clsname, self.filepath, self.raw_filepath) - def __enter__(self) -> 'BlendFile': + def __enter__(self) -> "BlendFile": return self def __exit__(self, exctype, excvalue, traceback) -> None: self.close() - def copy_and_rebind(self, path: pathlib.Path, mode='rb') -> None: + def copy_and_rebind(self, path: pathlib.Path, mode="rb") -> None: """Change which file is bound to this BlendFile. This allows cloning a previously opened file, and rebinding it to reuse the already-loaded DNA structs and data blocks. """ - log.debug('Rebinding %r to %s', self, path) + log.debug("Rebinding %r to %s", self, path) self.close() _uncache(self.filepath) - self.log.debug('Copying %s to %s', self.filepath, path) + self.log.debug("Copying %s to %s", self.filepath, path) # TODO(Sybren): remove str() calls when targeting Python 3.6+ - shutil.copy(str(self.filepath), str(path)) + # dst needs to be a file and not a directory + shutil.copyfile(str(self.filepath), str(path)) self.fileobj = self._open_file(path, mode=mode) _cache(path, self) @@ -239,10 +218,10 @@ class BlendFile: def mark_modified(self) -> None: """Recompess the file when it is closed.""" - self.log.debug('Marking %s as modified', self.raw_filepath) + self.log.debug("Marking %s as modified", self.raw_filepath) self._is_modified = True - def find_blocks_from_code(self, code: bytes) -> typing.List['BlendFileBlock']: + def find_blocks_from_code(self, code: bytes) -> typing.List["BlendFileBlock"]: assert isinstance(code, bytes) return self.code_index[code] @@ -255,19 +234,19 @@ class BlendFile: return if self._is_modified: - log.debug('closing blend file %s after it was modified', self.raw_filepath) + log.debug("closing blend file %s after it was modified", self.raw_filepath) if self._is_modified and self.is_compressed: - log.debug("recompressing modified blend file %s", self.raw_filepath) + log.debug("GZip-recompressing modified blend file %s", self.raw_filepath) self.fileobj.seek(os.SEEK_SET, 0) - with gzip.open(str(self.filepath), 'wb') as gzfile: + with gzip.open(str(self.filepath), "wb") as gzfile: while True: data = self.fileobj.read(FILE_BUFFER_SIZE) if not data: break gzfile.write(data) - log.debug("compressing to %s finished", self.filepath) + log.debug("GZip-compression to %s finished", self.filepath) # Close the file object after recompressing, as it may be a temporary # file that'll disappear as soon as we close it. @@ -284,11 +263,15 @@ class BlendFile: curr_struct = self.structs[sdna_index_curr] next_struct = self.structs[sdna_index_next] if curr_struct.size > next_struct.size: - raise RuntimeError("Can't refine to smaller type (%s -> %s)" % - (curr_struct.dna_type_id.decode('utf-8'), - next_struct.dna_type_id.decode('utf-8'))) + raise RuntimeError( + "Can't refine to smaller type (%s -> %s)" + % ( + curr_struct.dna_type_id.decode("utf-8"), + next_struct.dna_type_id.decode("utf-8"), + ) + ) - def decode_structs(self, block: 'BlendFileBlock'): + def decode_structs(self, block: "BlendFileBlock"): """ DNACatalog is a catalog of all information in the DNA1 file-block """ @@ -356,7 +339,9 @@ class BlendFile: dna_offset = 0 for field_index in range(fields_len): - field_type_index, field_name_index = shortstruct2.unpack_from(data, offset) + field_type_index, field_name_index = shortstruct2.unpack_from( + data, offset + ) offset += 4 dna_type = types[field_type_index] @@ -381,18 +366,33 @@ class BlendFile: root = bpathlib.BlendPath(bfile_dir) abspath = relpath.absolute(root) - my_log = self.log.getChild('abspath') - my_log.debug('Resolved %s relative to %s to %s', relpath, self.filepath, abspath) + my_log = self.log.getChild("abspath") + my_log.debug( + "Resolved %s relative to %s to %s", relpath, self.filepath, abspath + ) return abspath - def dereference_pointer(self, address: int) -> 'BlendFileBlock': - """Return the pointed-to block, or raise SegmentationFault.""" + def dereference_pointer(self, address: int) -> typing.Optional["BlendFileBlock"]: + """Return the pointed-to block, or raise SegmentationFault. + + When BlendFile.strict_pointer_mode is False, the exception will not be + thrown, but None will be returned. + """ try: return self.block_from_addr[address] except KeyError: - raise exceptions.SegmentationFault('address does not exist', address) from None + if self.strict_pointer_mode: + raise exceptions.SegmentationFault( + "address does not exist", address + ) from None + log.warning( + "Silenced SegmentationFault caused by dereferencing invalid pointer" + " (0x%x) because strict_pointer_mode is off.", + address, + ) + return None def struct(self, name: bytes) -> dna.Struct: index = self.sdna_index_from_id[name] @@ -410,19 +410,26 @@ class BlendFileBlock: # dependency tracer significantly (p<0.001) faster. In my test case the # speed improvement was 16% for a 'bam list' command. __slots__ = ( - 'bfile', 'code', 'size', 'addr_old', 'sdna_index', - 'count', 'file_offset', 'endian', '_id_name', + "bfile", + "code", + "size", + "addr_old", + "sdna_index", + "count", + "file_offset", + "endian", + "_id_name", ) - log = log.getChild('BlendFileBlock') - old_structure = struct.Struct(b'4sI') + log = log.getChild("BlendFileBlock") + old_structure = struct.Struct(b"4sI") """old blend files ENDB block structure""" def __init__(self, bfile: BlendFile) -> None: self.bfile = bfile # Defaults; actual values are set by interpreting the block header. - self.code = b'' + self.code = b"" self.size = 0 self.addr_old = 0 self.sdna_index = 0 @@ -438,10 +445,14 @@ class BlendFileBlock: header_struct = bfile.block_header_struct data = bfile.fileobj.read(header_struct.size) if len(data) != header_struct.size: - self.log.warning("Blend file %s seems to be truncated, " - "expected %d bytes but could read only %d", - bfile.filepath, header_struct.size, len(data)) - self.code = b'ENDB' + self.log.warning( + "Blend file %s seems to be truncated, " + "expected %d bytes but could read only %d", + bfile.filepath, + header_struct.size, + len(data), + ) + self.code = b"ENDB" return # header size can be 8, 20, or 24 bytes long @@ -449,14 +460,14 @@ class BlendFileBlock: # 20: normal headers 32 bit platform # 24: normal headers 64 bit platform if len(data) <= 15: - self.log.debug('interpreting block as old-style ENB block') + self.log.debug("interpreting block as old-style ENB block") blockheader = self.old_structure.unpack(data) self.code = self.endian.read_data0(blockheader[0]) return blockheader = header_struct.unpack(data) self.code = self.endian.read_data0(blockheader[0]) - if self.code != b'ENDB': + if self.code != b"ENDB": self.size = blockheader[1] self.addr_old = blockheader[2] self.sdna_index = blockheader[3] @@ -478,11 +489,13 @@ class BlendFileBlock: def __eq__(self, other: object) -> bool: if not isinstance(other, BlendFileBlock): return False - return (self.code == other.code and - self.addr_old == other.addr_old and - self.bfile.filepath == other.bfile.filepath) + return ( + self.code == other.code + and self.addr_old == other.addr_old + and self.bfile.filepath == other.bfile.filepath + ) - def __lt__(self, other: 'BlendFileBlock') -> bool: + def __lt__(self, other: "BlendFileBlock") -> bool: """Order blocks by file path and offset within that file.""" if not isinstance(other, BlendFileBlock): raise NotImplemented() @@ -504,7 +517,7 @@ class BlendFileBlock: @property def dna_type_name(self) -> str: - return self.dna_type_id.decode('ascii') + return self.dna_type_id.decode("ascii") @property def id_name(self) -> typing.Optional[bytes]: @@ -515,7 +528,7 @@ class BlendFileBlock: """ if self._id_name is ...: try: - self._id_name = self[b'id', b'name'] + self._id_name = self[b"id", b"name"] except KeyError: self._id_name = None @@ -553,16 +566,19 @@ class BlendFileBlock: :returns: tuple (offset in bytes, length of array in items) """ - field, field_offset = self.dna_type.field_from_path(self.bfile.header.pointer_size, path) + field, field_offset = self.dna_type.field_from_path( + self.bfile.header.pointer_size, path + ) return self.file_offset + field_offset, field.name.array_size - def get(self, - path: dna.FieldPath, - default=..., - null_terminated=True, - as_str=False, - return_field=False - ) -> typing.Any: + def get( + self, + path: dna.FieldPath, + default=..., + null_terminated=True, + as_str=False, + return_field=False, + ) -> typing.Any: """Read a property and return the value. :param path: name of the property (like `b'loc'`), tuple of names @@ -583,21 +599,25 @@ class BlendFileBlock: dna_struct = self.bfile.structs[self.sdna_index] field, value = dna_struct.field_get( - self.bfile.header, self.bfile.fileobj, path, + self.bfile.header, + self.bfile.fileobj, + path, default=default, - null_terminated=null_terminated, as_str=as_str, + null_terminated=null_terminated, + as_str=as_str, ) if return_field: return value, field return value - def get_recursive_iter(self, - path: dna.FieldPath, - path_root: dna.FieldPath = b'', - default=..., - null_terminated=True, - as_str=True, - ) -> typing.Iterator[typing.Tuple[dna.FieldPath, typing.Any]]: + def get_recursive_iter( + self, + path: dna.FieldPath, + path_root: dna.FieldPath = b"", + default=..., + null_terminated=True, + as_str=True, + ) -> typing.Iterator[typing.Tuple[dna.FieldPath, typing.Any]]: """Generator, yields (path, property value) tuples. If a property cannot be decoded, a string representing its DNA type @@ -613,20 +633,24 @@ class BlendFileBlock: try: # Try accessing as simple property - yield (path_full, - self.get(path_full, default, null_terminated, as_str)) + yield (path_full, self.get(path_full, default, null_terminated, as_str)) except exceptions.NoReaderImplemented as ex: # This was not a simple property, so recurse into its DNA Struct. dna_type = ex.dna_type struct_index = self.bfile.sdna_index_from_id.get(dna_type.dna_type_id) if struct_index is None: - yield (path_full, "<%s>" % dna_type.dna_type_id.decode('ascii')) + yield (path_full, "<%s>" % dna_type.dna_type_id.decode("ascii")) return # Recurse through the fields. for f in dna_type.fields: - yield from self.get_recursive_iter(f.name.name_only, path_full, default=default, - null_terminated=null_terminated, as_str=as_str) + yield from self.get_recursive_iter( + f.name.name_only, + path_full, + default=default, + null_terminated=null_terminated, + as_str=as_str, + ) def hash(self) -> int: """Generate a pointer-independent hash for the block. @@ -657,9 +681,10 @@ class BlendFileBlock: return dna_struct.field_set(self.bfile.header, self.bfile.fileobj, path, value) def get_pointer( - self, path: dna.FieldPath, - default=..., - ) -> typing.Union[None, 'BlendFileBlock']: + self, + path: dna.FieldPath, + default=..., + ) -> typing.Union[None, "BlendFileBlock"]: """Same as get() but dereferences a pointer. :raises exceptions.SegmentationFault: when there is no datablock with @@ -681,8 +706,9 @@ class BlendFileBlock: ex.field_path = path raise - def iter_array_of_pointers(self, path: dna.FieldPath, array_size: int) \ - -> typing.Iterator['BlendFileBlock']: + def iter_array_of_pointers( + self, path: dna.FieldPath, array_size: int + ) -> typing.Iterator["BlendFileBlock"]: """Dereference pointers from an array-of-pointers field. Use this function when you have a field like Mesh materials: @@ -698,8 +724,9 @@ class BlendFileBlock: array = self.get_pointer(path) assert array is not None - assert array.code == b'DATA', \ - 'Array data block should have code DATA, is %r' % array.code.decode() + assert array.code == b"DATA", ( + "Array data block should have code DATA, is %r" % array.code.decode() + ) file_offset = array.file_offset endian = self.bfile.header.endian @@ -711,10 +738,15 @@ class BlendFileBlock: address = endian.read_pointer(fileobj, ps) if address == 0: continue - yield self.bfile.dereference_pointer(address) + dereferenced = self.bfile.dereference_pointer(address) + if dereferenced is None: + # This can happen when strict pointer mode is disabled. + continue + yield dereferenced - def iter_fixed_array_of_pointers(self, path: dna.FieldPath) \ - -> typing.Iterator['BlendFileBlock']: + def iter_fixed_array_of_pointers( + self, path: dna.FieldPath + ) -> typing.Iterator["BlendFileBlock"]: """Yield blocks from a fixed-size array field. Use this function when you have a field like lamp textures: @@ -741,7 +773,12 @@ class BlendFileBlock: if not address: # Fixed-size arrays contain 0-pointers. continue - yield self.bfile.dereference_pointer(address) + + dereferenced = self.bfile.dereference_pointer(address) + if dereferenced is None: + # This can happen when strict pointer mode is disabled. + continue + yield dereferenced def __getitem__(self, path: dna.FieldPath): return self.get(path) @@ -758,16 +795,32 @@ class BlendFileBlock: try: yield self[k] except exceptions.NoReaderImplemented as ex: - yield '<%s>' % ex.dna_type.dna_type_id.decode('ascii') + yield "<%s>" % ex.dna_type.dna_type_id.decode("ascii") def items(self) -> typing.Iterable[typing.Tuple[bytes, typing.Any]]: for k in self.keys(): try: yield (k, self[k]) except exceptions.NoReaderImplemented as ex: - yield (k, '<%s>' % ex.dna_type.dna_type_id.decode('ascii')) + yield (k, "<%s>" % ex.dna_type.dna_type_id.decode("ascii")) - def items_recursive(self) -> typing.Iterator[typing.Tuple[dna.FieldPath, typing.Any]]: + def items_recursive( + self, + ) -> typing.Iterator[typing.Tuple[dna.FieldPath, typing.Any]]: """Generator, yields (property path, property value) recursively for all properties.""" for k in self.keys(): yield from self.get_recursive_iter(k, as_str=False) + + +def set_strict_pointer_mode(strict_pointers: bool) -> None: + """Control behaviour when a pointer to unknown memory is dereferenced. + + Strict pointer mode raise exceptions.SegmentationFault when dereferencing an + unknown pointer. This is the default. + + Set to False to disable this exception, and to return None instead, i.e. to + ignore such pointers. Note that this can cause None to be returned from a + non-nil pointer. + """ + + BlendFile.strict_pointer_mode = strict_pointers diff --git a/blendfile/dna.py b/blendfile/dna.py index 21f60e4..061c894 100644 --- a/blendfile/dna.py +++ b/blendfile/dna.py @@ -42,38 +42,38 @@ class Name: self.array_size = self.calc_array_size() def __repr__(self): - return '%s(%r)' % (type(self).__qualname__, self.name_full) + return "%s(%r)" % (type(self).__qualname__, self.name_full) def as_reference(self, parent) -> bytes: if not parent: return self.name_only - return parent + b'.' + self.name_only + return parent + b"." + self.name_only def calc_name_only(self) -> bytes: - result = self.name_full.strip(b'*()') - index = result.find(b'[') + result = self.name_full.strip(b"*()") + index = result.find(b"[") if index == -1: return result return result[:index] def calc_is_pointer(self) -> bool: - return b'*' in self.name_full + return b"*" in self.name_full def calc_is_method_pointer(self): - return b'(*' in self.name_full + return b"(*" in self.name_full def calc_array_size(self): result = 1 partial_name = self.name_full while True: - idx_start = partial_name.find(b'[') + idx_start = partial_name.find(b"[") if idx_start < 0: break - idx_stop = partial_name.find(b']') - result *= int(partial_name[idx_start + 1:idx_stop]) - partial_name = partial_name[idx_stop + 1:] + idx_stop = partial_name.find(b"]") + result *= int(partial_name[idx_start + 1 : idx_stop]) + partial_name = partial_name[idx_stop + 1 :] return result @@ -89,24 +89,20 @@ class Field: :ivar offset: cached offset of the field, in bytes. """ - def __init__(self, - dna_type: 'Struct', - name: Name, - size: int, - offset: int) -> None: + def __init__(self, dna_type: "Struct", name: Name, size: int, offset: int) -> None: self.dna_type = dna_type self.name = name self.size = size self.offset = offset def __repr__(self): - return '<%r %r (%s)>' % (type(self).__qualname__, self.name, self.dna_type) + return "<%r %r (%s)>" % (type(self).__qualname__, self.name, self.dna_type) class Struct: """dna.Struct is a C-type structure stored in the DNA.""" - log = log.getChild('Struct') + log = log.getChild("Struct") def __init__(self, dna_type_id: bytes, size: int = None) -> None: """ @@ -121,13 +117,13 @@ class Struct: self._fields_by_name = {} # type: typing.Dict[bytes, Field] def __repr__(self): - return '%s(%r)' % (type(self).__qualname__, self.dna_type_id) + return "%s(%r)" % (type(self).__qualname__, self.dna_type_id) @property def size(self) -> int: if self._size is None: if not self._fields: - raise ValueError('Unable to determine size of fieldless %r' % self) + raise ValueError("Unable to determine size of fieldless %r" % self) last_field = max(self._fields, key=lambda f: f.offset) self._size = last_field.offset + last_field.size return self._size @@ -151,10 +147,9 @@ class Struct: def has_field(self, field_name: bytes) -> bool: return field_name in self._fields_by_name - def field_from_path(self, - pointer_size: int, - path: FieldPath) \ - -> typing.Tuple[Field, int]: + def field_from_path( + self, pointer_size: int, path: FieldPath + ) -> typing.Tuple[Field, int]: """ Support lookups as bytes or a tuple of bytes and optional index. @@ -181,12 +176,14 @@ class Struct: index = 0 if not isinstance(name, bytes): - raise TypeError('name should be bytes, but is %r' % type(name)) + raise TypeError("name should be bytes, but is %r" % type(name)) field = self._fields_by_name.get(name) if not field: - raise KeyError('%r has no field %r, only %r' % - (self, name, sorted(self._fields_by_name.keys()))) + raise KeyError( + "%r has no field %r, only %r" + % (self, name, sorted(self._fields_by_name.keys())) + ) offset = field.offset if index: @@ -195,8 +192,10 @@ class Struct: else: index_offset = field.dna_type.size * index if index_offset >= field.size: - raise OverflowError('path %r is out of bounds of its DNA type %s' % - (path, field.dna_type)) + raise OverflowError( + "path %r is out of bounds of its DNA type %s" + % (path, field.dna_type) + ) offset += index_offset if name_tail: @@ -205,14 +204,15 @@ class Struct: return field, offset - def field_get(self, - file_header: header.BlendFileHeader, - fileobj: typing.IO[bytes], - path: FieldPath, - default=..., - null_terminated=True, - as_str=True, - ) -> typing.Tuple[typing.Optional[Field], typing.Any]: + def field_get( + self, + file_header: header.BlendFileHeader, + fileobj: typing.IO[bytes], + path: FieldPath, + default=..., + null_terminated=True, + as_str=True, + ) -> typing.Tuple[typing.Optional[Field], typing.Any]: """Read the value of the field from the blend file. Assumes the file pointer of `fileobj` is seek()ed to the start of the @@ -248,22 +248,26 @@ class Struct: # Some special cases (pointers, strings/bytes) if dna_name.is_pointer: return field, endian.read_pointer(fileobj, file_header.pointer_size) - if dna_type.dna_type_id == b'char': - return field, self._field_get_char(file_header, fileobj, field, null_terminated, as_str) + if dna_type.dna_type_id == b"char": + return field, self._field_get_char( + file_header, fileobj, field, null_terminated, as_str + ) simple_readers = { - b'int': endian.read_int, - b'short': endian.read_short, - b'uint64_t': endian.read_ulong, - b'float': endian.read_float, + b"int": endian.read_int, + b"short": endian.read_short, + b"uint64_t": endian.read_ulong, + b"float": endian.read_float, } try: simple_reader = simple_readers[dna_type.dna_type_id] except KeyError: raise exceptions.NoReaderImplemented( - "%r exists but not simple type (%r), can't resolve field %r" % - (path, dna_type.dna_type_id.decode(), dna_name.name_only), - dna_name, dna_type) from None + "%r exists but not simple type (%r), can't resolve field %r" + % (path, dna_type.dna_type_id.decode(), dna_name.name_only), + dna_name, + dna_type, + ) from None if isinstance(path, tuple) and len(path) > 1 and isinstance(path[-1], int): # The caller wants to get a single item from an array. The offset we seeked to already @@ -275,12 +279,14 @@ class Struct: return field, [simple_reader(fileobj) for _ in range(dna_name.array_size)] return field, simple_reader(fileobj) - def _field_get_char(self, - file_header: header.BlendFileHeader, - fileobj: typing.IO[bytes], - field: 'Field', - null_terminated: typing.Optional[bool], - as_str: bool) -> typing.Any: + def _field_get_char( + self, + file_header: header.BlendFileHeader, + fileobj: typing.IO[bytes], + field: "Field", + null_terminated: typing.Optional[bool], + as_str: bool, + ) -> typing.Any: dna_name = field.name endian = file_header.endian @@ -294,21 +300,23 @@ class Struct: data = fileobj.read(dna_name.array_size) if as_str: - return data.decode('utf8') + return data.decode("utf8") return data - def field_set(self, - file_header: header.BlendFileHeader, - fileobj: typing.IO[bytes], - path: bytes, - value: typing.Any): + def field_set( + self, + file_header: header.BlendFileHeader, + fileobj: typing.IO[bytes], + path: bytes, + value: typing.Any, + ): """Write a value to the blend file. Assumes the file pointer of `fileobj` is seek()ed to the start of the struct on disk (e.g. the start of the BlendFileBlock containing the data). """ - assert isinstance(path, bytes), 'path should be bytes, but is %s' % type(path) + assert isinstance(path, bytes), "path should be bytes, but is %s" % type(path) field, offset = self.field_from_path(file_header.pointer_size, path) @@ -316,19 +324,29 @@ class Struct: dna_name = field.name endian = file_header.endian - if dna_type.dna_type_id != b'char': + if dna_type.dna_type_id not in endian.accepted_types(): msg = "Setting type %r is not supported for %s.%s" % ( - dna_type, self.dna_type_id.decode(), dna_name.name_full.decode()) + dna_type, + self.dna_type_id.decode(), + dna_name.name_full.decode(), + ) raise exceptions.NoWriterImplemented(msg, dna_name, dna_type) fileobj.seek(offset, os.SEEK_CUR) if self.log.isEnabledFor(logging.DEBUG): filepos = fileobj.tell() - thing = 'string' if isinstance(value, str) else 'bytes' - self.log.debug('writing %s %r at file offset %d / %x', thing, value, filepos, filepos) + if isinstance(value, (int, float)): + thing = dna_type.dna_type_id.decode() + else: + thing = "string" if isinstance(value, str) else "bytes" + self.log.debug( + "writing %s %r at file offset %d / %x", thing, value, filepos, filepos + ) - if isinstance(value, str): + if isinstance(value, (int, float)): + return endian.accepted_types()[dna_type.dna_type_id](fileobj, value) + elif isinstance(value, str): return endian.write_string(fileobj, value, dna_name.array_size) else: return endian.write_bytes(fileobj, value, dna_name.array_size) diff --git a/blendfile/dna_io.py b/blendfile/dna_io.py index ddfce71..ac46794 100644 --- a/blendfile/dna_io.py +++ b/blendfile/dna_io.py @@ -27,14 +27,14 @@ import typing class EndianIO: # TODO(Sybren): note as UCHAR: struct.Struct = None and move actual structs to LittleEndianTypes - UCHAR = struct.Struct(b' int: + def write_pointer(cls, fileobj: typing.IO[bytes], pointer_size: int, value: int): + """Write a pointer to a file.""" + + if pointer_size == 4: + return cls.write_uint(fileobj, value) + if pointer_size == 8: + return cls.write_ulong(fileobj, value) + raise ValueError("unsupported pointer size %d" % pointer_size) + + @classmethod + def write_string( + cls, fileobj: typing.IO[bytes], astring: str, fieldlen: int + ) -> int: """Write a (truncated) string as UTF-8. The string will always be written 0-terminated. @@ -94,7 +144,7 @@ class EndianIO: :returns: the number of bytes written. """ assert isinstance(astring, str) - encoded = astring.encode('utf-8') + encoded = astring.encode("utf-8") # Take into account we also need space for a trailing 0-byte. maxlen = fieldlen - 1 @@ -106,13 +156,13 @@ class EndianIO: # is valid UTF-8 again. while True: try: - encoded.decode('utf8') + encoded.decode("utf8") except UnicodeDecodeError: encoded = encoded[:-1] else: break - return fileobj.write(encoded + b'\0') + return fileobj.write(encoded + b"\0") @classmethod def write_bytes(cls, fileobj: typing.IO[bytes], data: bytes, fieldlen: int) -> int: @@ -126,7 +176,7 @@ class EndianIO: if len(data) >= fieldlen: to_write = data[0:fieldlen] else: - to_write = data + b'\0' + to_write = data + b"\0" return fileobj.write(to_write) @@ -137,27 +187,45 @@ class EndianIO: @classmethod def read_data0_offset(cls, data, offset): - add = data.find(b'\0', offset) - offset - return data[offset:offset + add] + add = data.find(b"\0", offset) - offset + return data[offset : offset + add] @classmethod def read_data0(cls, data): - add = data.find(b'\0') + add = data.find(b"\0") if add < 0: return data return data[:add] + @classmethod + def accepted_types(cls): + """Return a mapping from type name to writer function. + + This is mostly to make it easier to get the correct number write + function, given that Python's `int` and `float` can map to a whole range + of C types. + """ + return { + b"char": cls.write_char, + b"ushort": cls.write_ushort, + b"short": cls.write_short, + b"uint": cls.write_uint, + b"int": cls.write_int, + b"ulong": cls.write_ulong, + b"float": cls.write_float, + } + class LittleEndianTypes(EndianIO): pass class BigEndianTypes(LittleEndianTypes): - UCHAR = struct.Struct(b'>B') - USHORT = struct.Struct(b'>H') - USHORT2 = struct.Struct(b'>HH') # two shorts in a row - SSHORT = struct.Struct(b'>h') - UINT = struct.Struct(b'>I') - SINT = struct.Struct(b'>i') - FLOAT = struct.Struct(b'>f') - ULONG = struct.Struct(b'>Q') + UCHAR = struct.Struct(b">B") + USHORT = struct.Struct(b">H") + USHORT2 = struct.Struct(b">HH") # two shorts in a row + SSHORT = struct.Struct(b">h") + UINT = struct.Struct(b">I") + SINT = struct.Struct(b">i") + FLOAT = struct.Struct(b">f") + ULONG = struct.Struct(b">Q") diff --git a/blendfile/exceptions.py b/blendfile/exceptions.py index c2350c1..9aa1bc8 100644 --- a/blendfile/exceptions.py +++ b/blendfile/exceptions.py @@ -32,7 +32,7 @@ class BlendFileError(Exception): self.filepath = filepath def __str__(self): - return '%s: %s' % (super().__str__(), self.filepath) + return "%s: %s" % (super().__str__(), self.filepath) class NoDNA1Block(BlendFileError): diff --git a/blendfile/header.py b/blendfile/header.py index b059313..e05fe47 100644 --- a/blendfile/header.py +++ b/blendfile/header.py @@ -37,7 +37,8 @@ class BlendFileHeader: It contains information about the hardware architecture, which is relevant to the structure of the rest of the file. """ - structure = struct.Struct(b'7s1s1s3s') + + structure = struct.Struct(b"7s1s1s3s") def __init__(self, fileobj: typing.IO[bytes], path: pathlib.Path) -> None: log.debug("reading blend-file-header %s", path) @@ -48,31 +49,39 @@ class BlendFileHeader: self.magic = values[0] pointer_size_id = values[1] - if pointer_size_id == b'-': + if pointer_size_id == b"-": self.pointer_size = 8 - elif pointer_size_id == b'_': + elif pointer_size_id == b"_": self.pointer_size = 4 else: - raise exceptions.BlendFileError('invalid pointer size %r' % pointer_size_id, path) + raise exceptions.BlendFileError( + "invalid pointer size %r" % pointer_size_id, path + ) endian_id = values[2] - if endian_id == b'v': + if endian_id == b"v": self.endian = dna_io.LittleEndianTypes - self.endian_str = b'<' # indication for struct.Struct() - elif endian_id == b'V': + self.endian_str = b"<" # indication for struct.Struct() + elif endian_id == b"V": self.endian = dna_io.BigEndianTypes - self.endian_str = b'>' # indication for struct.Struct() + self.endian_str = b">" # indication for struct.Struct() else: - raise exceptions.BlendFileError('invalid endian indicator %r' % endian_id, path) + raise exceptions.BlendFileError( + "invalid endian indicator %r" % endian_id, path + ) version_id = values[3] self.version = int(version_id) def create_block_header_struct(self) -> struct.Struct: """Create a Struct instance for parsing data block headers.""" - return struct.Struct(b''.join(( - self.endian_str, - b'4sI', - b'I' if self.pointer_size == 4 else b'Q', - b'II', - ))) + return struct.Struct( + b"".join( + ( + self.endian_str, + b"4sI", + b"I" if self.pointer_size == 4 else b"Q", + b"II", + ) + ) + ) diff --git a/blendfile/iterators.py b/blendfile/iterators.py index 1a76e4c..21849a6 100644 --- a/blendfile/iterators.py +++ b/blendfile/iterators.py @@ -26,8 +26,9 @@ from . import BlendFileBlock from .dna import FieldPath -def listbase(block: typing.Optional[BlendFileBlock], next_path: FieldPath = b'next') \ - -> typing.Iterator[BlendFileBlock]: +def listbase( + block: typing.Optional[BlendFileBlock], next_path: FieldPath = b"next" +) -> typing.Iterator[BlendFileBlock]: """Generator, yields all blocks in the ListBase linked list.""" while block: yield block @@ -37,8 +38,9 @@ def listbase(block: typing.Optional[BlendFileBlock], next_path: FieldPath = b'ne block = block.bfile.dereference_pointer(next_ptr) -def sequencer_strips(sequence_editor: BlendFileBlock) \ - -> typing.Iterator[typing.Tuple[BlendFileBlock, int]]: +def sequencer_strips( + sequence_editor: BlendFileBlock, +) -> typing.Iterator[typing.Tuple[BlendFileBlock, int]]: """Generator, yield all sequencer strip blocks with their type number. Recurses into meta strips, yielding both the meta strip itself and the @@ -49,16 +51,16 @@ def sequencer_strips(sequence_editor: BlendFileBlock) \ def iter_seqbase(seqbase) -> typing.Iterator[typing.Tuple[BlendFileBlock, int]]: for seq in listbase(seqbase): - seq.refine_type(b'Sequence') - seq_type = seq[b'type'] + seq.refine_type(b"Sequence") + seq_type = seq[b"type"] yield seq, seq_type if seq_type == cdefs.SEQ_TYPE_META: # Recurse into this meta-sequence. - subseq = seq.get_pointer((b'seqbase', b'first')) + subseq = seq.get_pointer((b"seqbase", b"first")) yield from iter_seqbase(subseq) - sbase = sequence_editor.get_pointer((b'seqbase', b'first')) + sbase = sequence_editor.get_pointer((b"seqbase", b"first")) yield from iter_seqbase(sbase) @@ -66,5 +68,5 @@ def modifiers(object_block: BlendFileBlock) -> typing.Iterator[BlendFileBlock]: """Generator, yield the object's modifiers.""" # 'ob->modifiers[...]' - mods = object_block.get_pointer((b'modifiers', b'first')) - yield from listbase(mods, next_path=(b'modifier', b'next')) + mods = object_block.get_pointer((b"modifiers", b"first")) + yield from listbase(mods, next_path=(b"modifier", b"next")) diff --git a/blendfile/magic_compression.py b/blendfile/magic_compression.py new file mode 100644 index 0000000..51a1cc4 --- /dev/null +++ b/blendfile/magic_compression.py @@ -0,0 +1,169 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2021, Blender Foundation + +import collections +import enum +import gzip +import logging +import os +import pathlib +import tempfile +import typing + +# Blender 3.0 replaces GZip with ZStandard compression. +# Since this is not a standard library package, be careful importing it and +# treat it as optional. +try: + import zstandard + + has_zstandard = True +except ImportError: + has_zstandard = False + +from . import exceptions + +# Magic numbers, see https://en.wikipedia.org/wiki/List_of_file_signatures +BLENDFILE_MAGIC = b"BLENDER" +GZIP_MAGIC = b"\x1f\x8b" + +# ZStandard has two magic numbers, the 2nd of which doesn't use the last nibble. +# See https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2.1.1 +# and https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2.3 +ZSTD_MAGIC = b"\x28\xB5\x2F\xFD" +ZSTD_MAGIC_SKIPPABLE = b"\x50\x2A\x4D\x18" +ZSTD_MAGIC_SKIPPABLE_MASK = b"\xF0\xFF\xFF\xFF" + +log = logging.getLogger(__name__) + + +# @dataclasses.dataclass +DecompressedFileInfo = collections.namedtuple( + "DecompressedFileInfo", "is_compressed path fileobj" +) +# is_compressed: bool +# path: pathlib.Path +# """The path of the decompressed file, or the input path if the file is not compressed.""" +# fileobj: BinaryIO + + +class Compression(enum.Enum): + UNRECOGNISED = -1 + NONE = 0 + GZIP = 1 + ZSTD = 2 + + +def open(path: pathlib.Path, mode: str, buffer_size: int) -> DecompressedFileInfo: + """Open the file, decompressing it into a temporary file if necesssary.""" + fileobj = path.open(mode, buffering=buffer_size) # typing.IO[bytes] + compression = find_compression_type(fileobj) + + if compression == Compression.UNRECOGNISED: + fileobj.close() + raise exceptions.BlendFileError("File is not a blend file", path) + + if compression == Compression.NONE: + return DecompressedFileInfo( + is_compressed=False, + path=path, + fileobj=fileobj, + ) + + log.debug("%s-compressed blendfile detected: %s", compression.name, path) + + # Decompress to a temporary file. + tmpfile = tempfile.NamedTemporaryFile() + fileobj.seek(0, os.SEEK_SET) + + decompressor = _decompressor(fileobj, mode, compression) + + with decompressor as compressed_file: + magic = compressed_file.read(len(BLENDFILE_MAGIC)) + if magic != BLENDFILE_MAGIC: + raise exceptions.BlendFileError("Compressed file is not a blend file", path) + + data = magic + while data: + tmpfile.write(data) + data = compressed_file.read(buffer_size) + + # Further interaction should be done with the uncompressed file. + fileobj.close() + return DecompressedFileInfo( + is_compressed=True, + path=pathlib.Path(tmpfile.name), + fileobj=tmpfile, + ) + + +def find_compression_type(fileobj: typing.IO[bytes]) -> Compression: + fileobj.seek(0, os.SEEK_SET) + + # This assumes that all magics are not longer than "BLENDER". + magic = fileobj.read(len(BLENDFILE_MAGIC)) + if _matches_magic(magic, BLENDFILE_MAGIC): + return Compression.NONE + + if _matches_magic(magic, GZIP_MAGIC): + return Compression.GZIP + + if _matches_magic(magic, ZSTD_MAGIC): + return Compression.ZSTD + if _matches_magic_masked(magic, ZSTD_MAGIC_SKIPPABLE, ZSTD_MAGIC_SKIPPABLE_MASK): + return Compression.ZSTD + + return Compression.UNRECOGNISED + + +def _matches_magic_masked(value: bytes, magic: bytes, mask: bytes) -> bool: + """Returns True only if value & mask == magic & mask (ignoring trailing bytes in value).""" + + assert len(magic) == len(mask) + + int_value = int.from_bytes(value[: len(magic)], "little") + int_magic = int.from_bytes(magic, "little") + int_mask = int.from_bytes(mask, "little") + + return int_value & int_mask == int_magic & int_mask + + +def _matches_magic(value: bytes, magic: bytes) -> bool: + """Returns True only if value == magic (ignoring trailing bytes in value).""" + + return value[: len(magic)] == magic + + +def _decompressor( + fileobj: typing.IO[bytes], mode: str, compression: Compression +) -> typing.IO[bytes]: + if compression == Compression.GZIP: + decompressor = gzip.GzipFile(fileobj=fileobj, mode=mode) + return typing.cast(typing.IO[bytes], decompressor) + + if compression == Compression.ZSTD: + if not has_zstandard: + # The required module was not loaded, raise an exception about this. + raise EnvironmentError( + "File is compressed with ZStandard, install the `zstandard` module to support this." + ) + dctx = zstandard.ZstdDecompressor() + return dctx.stream_reader(fileobj) + + raise ValueError("Unsupported compression type: %s" % compression) diff --git a/bpathlib.py b/bpathlib.py index 5e878af..477292c 100644 --- a/bpathlib.py +++ b/bpathlib.py @@ -35,14 +35,16 @@ class BlendPath(bytes): def __new__(cls, path): if isinstance(path, pathlib.PurePath): - path = str(path).encode('utf-8') + path = str(path).encode("utf-8") if not isinstance(path, bytes): - raise TypeError('path must be bytes or pathlib.Path, but is %r' % path) + raise TypeError("path must be bytes or pathlib.Path, but is %r" % path) - return super().__new__(cls, path.replace(b'\\', b'/')) + return super().__new__(cls, path.replace(b"\\", b"/")) @classmethod - def mkrelative(cls, asset_path: pathlib.PurePath, bfile_path: pathlib.PurePath) -> 'BlendPath': + def mkrelative( + cls, asset_path: pathlib.PurePath, bfile_path: pathlib.PurePath + ) -> "BlendPath": """Construct a BlendPath to the asset relative to the blend file. Assumes that bfile_path is absolute. @@ -53,10 +55,14 @@ class BlendPath(bytes): from collections import deque # Only compare absolute paths. - assert bfile_path.is_absolute(), \ - 'BlendPath().mkrelative(bfile_path=%r) should get absolute bfile_path' % bfile_path - assert asset_path.is_absolute(), \ - 'BlendPath().mkrelative(asset_path=%r) should get absolute asset_path' % asset_path + assert bfile_path.is_absolute(), ( + "BlendPath().mkrelative(bfile_path=%r) should get absolute bfile_path" + % bfile_path + ) + assert asset_path.is_absolute(), ( + "BlendPath().mkrelative(asset_path=%r) should get absolute asset_path" + % asset_path + ) # There is no way to construct a relative path between drives. if bfile_path.drive != asset_path.drive: @@ -77,8 +83,8 @@ class BlendPath(bytes): rel_asset = pathlib.PurePath(*asset_parts) # TODO(Sybren): should we use sys.getfilesystemencoding() instead? - rel_bytes = str(rel_asset).encode('utf-8') - as_bytes = b'//' + len(bdir_parts) * b'../' + rel_bytes + rel_bytes = str(rel_asset).encode("utf-8") + as_bytes = b"//" + len(bdir_parts) * b"../" + rel_bytes return cls(as_bytes) def __str__(self) -> str: @@ -87,23 +93,23 @@ class BlendPath(bytes): Undecodable bytes are ignored so this function can be safely used for reporting. """ - return self.decode('utf8', errors='replace') + return self.decode("utf8", errors="replace") def __repr__(self) -> str: - return 'BlendPath(%s)' % super().__repr__() + return "BlendPath(%s)" % super().__repr__() def __truediv__(self, subpath: bytes): """Slash notation like pathlib.Path.""" sub = BlendPath(subpath) if sub.is_absolute(): raise ValueError("'a / b' only works when 'b' is a relative path") - return BlendPath(self.rstrip(b'/') + b'/' + sub) + return BlendPath(self.rstrip(b"/") + b"/" + sub) def __rtruediv__(self, parentpath: bytes): """Slash notation like pathlib.Path.""" if self.is_absolute(): raise ValueError("'a / b' only works when 'b' is a relative path") - return BlendPath(parentpath.rstrip(b'/') + b'/' + self) + return BlendPath(parentpath.rstrip(b"/") + b"/" + self) def to_path(self) -> pathlib.PurePath: """Convert this path to a pathlib.PurePath. @@ -118,32 +124,34 @@ class BlendPath(bytes): """ # TODO(Sybren): once we target Python 3.6, implement __fspath__(). try: - decoded = self.decode('utf8') + decoded = self.decode("utf8") except UnicodeDecodeError: decoded = self.decode(sys.getfilesystemencoding()) if self.is_blendfile_relative(): - raise ValueError('to_path() cannot be used on blendfile-relative paths') + raise ValueError("to_path() cannot be used on blendfile-relative paths") return pathlib.PurePath(decoded) def is_blendfile_relative(self) -> bool: - return self[:2] == b'//' + return self[:2] == b"//" def is_absolute(self) -> bool: if self.is_blendfile_relative(): return False - if self[0:1] == b'/': + if self[0:1] == b"/": return True # Windows style path starting with drive letter. - if (len(self) >= 3 and - (self.decode('utf8'))[0] in string.ascii_letters and - self[1:2] == b':' and - self[2:3] in {b'\\', b'/'}): + if ( + len(self) >= 3 + and (self.decode("utf8"))[0] in string.ascii_letters + and self[1:2] == b":" + and self[2:3] in {b"\\", b"/"} + ): return True return False - def absolute(self, root: bytes = b'') -> 'BlendPath': + def absolute(self, root: bytes = b"") -> "BlendPath": """Determine absolute path. :param root: root directory to compute paths relative to. @@ -175,9 +183,9 @@ def make_absolute(path: pathlib.PurePath) -> pathlib.Path: The type of the returned path is determined by the current platform. """ str_path = path.as_posix() - if len(str_path) >= 2 and str_path[0].isalpha() and str_path[1] == ':': + if len(str_path) >= 2 and str_path[0].isalpha() and str_path[1] == ":": # This is an absolute Windows path. It must be handled with care on non-Windows platforms. - if platform.system() != 'Windows': + if platform.system() != "Windows": # Normalize the POSIX-like part of the path, but leave out the drive letter. non_drive_path = str_path[2:] normalized = os.path.normpath(non_drive_path) @@ -203,7 +211,12 @@ def strip_root(path: pathlib.PurePath) -> pathlib.PurePosixPath: # This happens when running on POSIX but still handling paths # originating from a Windows machine. parts = path.parts - if parts and len(parts[0]) == 2 and parts[0][0].isalpha() and parts[0][1] == ':': + if ( + parts + and len(parts[0]) == 2 + and parts[0][0].isalpha() + and parts[0][1] == ":" + ): # The first part is a drive letter. return pathlib.PurePosixPath(parts[0][0], *path.parts[1:]) diff --git a/cdefs.py b/cdefs.py index 8a22f84..099a638 100644 --- a/cdefs.py +++ b/cdefs.py @@ -32,7 +32,7 @@ SEQ_TYPE_EFFECT = 8 IMA_SRC_FILE = 1 IMA_SRC_SEQUENCE = 2 IMA_SRC_MOVIE = 3 -IMA_SRC_TILED = 6 +IMA_SRC_TILED = 6 # UDIM # DNA_modifier_types.h eModifierType_Wave = 7 @@ -48,6 +48,7 @@ eModifierType_WeightVGProximity = 38 eModifierType_Ocean = 39 eModifierType_MeshCache = 46 eModifierType_MeshSequenceCache = 52 +eModifierType_Fluid = 56 eModifierType_Nodes = 57 # DNA_particle_types.h @@ -62,12 +63,40 @@ OB_DUPLIGROUP = 1 << 8 PTCACHE_DISK_CACHE = 64 PTCACHE_EXTERNAL = 512 +# DNA_ID_types.h +IDP_STRING = 0 +IDP_INT = 1 +IDP_FLOAT = 2 +IDP_ARRAY = 5 +IDP_GROUP = 6 +IDP_ID = 7 +IDP_DOUBLE = 8 +IDP_IDPARRAY = 9 +IDP_NUMTYPES = 10 + +# DNA_node_types.h +SOCK_CUSTOM = -1 # socket has no integer type +SOCK_FLOAT = 0 +SOCK_VECTOR = 1 +SOCK_RGBA = 2 +SOCK_SHADER = 3 +SOCK_BOOLEAN = 4 +SOCK_MESH = 5 # deprecated +SOCK_INT = 6 +SOCK_STRING = 7 +SOCK_OBJECT = 8 +SOCK_IMAGE = 9 +SOCK_GEOMETRY = 10 +SOCK_COLLECTION = 11 +SOCK_TEXTURE = 12 +SOCK_MATERIAL = 13 + # BKE_pointcache.h PTCACHE_FILE_PTCACHE = 0 PTCACHE_FILE_OPENVDB = 1 -PTCACHE_EXT = b'.bphys' -PTCACHE_EXT_VDB = b'.vdb' -PTCACHE_PATH = b'blendcache_' +PTCACHE_EXT = b".bphys" +PTCACHE_EXT_VDB = b".vdb" +PTCACHE_PATH = b"blendcache_" # BKE_node.h SH_NODE_TEX_IMAGE = 143 diff --git a/cli/__init__.py b/cli/__init__.py index 3270fae..f6bcd18 100644 --- a/cli/__init__.py +++ b/cli/__init__.py @@ -24,67 +24,99 @@ import datetime import logging import time -from . import blocks, common, pack, list_deps +from . import blocks, common, pack, list_deps, version def cli_main(): from blender_asset_tracer import __version__ - parser = argparse.ArgumentParser(description='BAT: Blender Asset Tracer v%s' % __version__) - common.add_flag(parser, 'profile', help='Run the profiler, write to bam.prof') + + parser = argparse.ArgumentParser( + description="BAT: Blender Asset Tracer v%s" % __version__ + ) + common.add_flag(parser, "profile", help="Run the profiler, write to bam.prof") # func is set by subparsers to indicate which function to run. - parser.set_defaults(func=None, - loglevel=logging.WARNING) + parser.set_defaults(func=None, loglevel=logging.WARNING) loggroup = parser.add_mutually_exclusive_group() - loggroup.add_argument('-v', '--verbose', dest='loglevel', - action='store_const', const=logging.INFO, - help='Log INFO level and higher') - loggroup.add_argument('-d', '--debug', dest='loglevel', - action='store_const', const=logging.DEBUG, - help='Log everything') - loggroup.add_argument('-q', '--quiet', dest='loglevel', - action='store_const', const=logging.ERROR, - help='Log at ERROR level and higher') + loggroup.add_argument( + "-v", + "--verbose", + dest="loglevel", + action="store_const", + const=logging.INFO, + help="Log INFO level and higher", + ) + loggroup.add_argument( + "-d", + "--debug", + dest="loglevel", + action="store_const", + const=logging.DEBUG, + help="Log everything", + ) + loggroup.add_argument( + "-q", + "--quiet", + dest="loglevel", + action="store_const", + const=logging.ERROR, + help="Log at ERROR level and higher", + ) + parser.add_argument( + "-S", + "--strict-pointers", + default=False, + action="store_true", + help="Crash on pointers to missing data; otherwise the missing data is just ignored.", + ) + subparsers = parser.add_subparsers( - help='Choose a subcommand to actually make BAT do something. ' - 'Global options go before the subcommand, ' - 'whereas subcommand-specific options go after it. ' - 'Use --help after the subcommand to get more info.') + help="Choose a subcommand to actually make BAT do something. " + "Global options go before the subcommand, " + "whereas subcommand-specific options go after it. " + "Use --help after the subcommand to get more info." + ) blocks.add_parser(subparsers) pack.add_parser(subparsers) list_deps.add_parser(subparsers) + version.add_parser(subparsers) args = parser.parse_args() config_logging(args) from blender_asset_tracer import __version__ + log = logging.getLogger(__name__) # Make sure the things we log in our local logger are visible if args.profile and args.loglevel > logging.INFO: log.setLevel(logging.INFO) - log.debug('Running BAT version %s', __version__) + log.debug("Running BAT version %s", __version__) if not args.func: - parser.error('No subcommand was given') + parser.error("No subcommand was given") + + set_strict_pointer_mode(args.strict_pointers) start_time = time.time() if args.profile: import cProfile - prof_fname = 'bam.prof' - log.info('Running profiler') - cProfile.runctx('args.func(args)', - globals=globals(), - locals=locals(), - filename=prof_fname) - log.info('Profiler exported data to %s', prof_fname) - log.info('Run "pyprof2calltree -i %r -k" to convert and open in KCacheGrind', prof_fname) + prof_fname = "bam.prof" + log.info("Running profiler") + cProfile.runctx( + "args.func(args)", globals=globals(), locals=locals(), filename=prof_fname + ) + log.info("Profiler exported data to %s", prof_fname) + log.info( + 'Run "pyprof2calltree -i %r -k" to convert and open in KCacheGrind', + prof_fname, + ) else: retval = args.func(args) duration = datetime.timedelta(seconds=time.time() - start_time) - log.info('Command took %s to complete', duration) + log.info("Command took %s to complete", duration) def config_logging(args): @@ -92,8 +124,14 @@ def config_logging(args): logging.basicConfig( level=logging.WARNING, - format='%(asctime)-15s %(levelname)8s %(name)-40s %(message)s', + format="%(asctime)-15s %(levelname)8s %(name)-40s %(message)s", ) # Only set the log level on our own logger. Otherwise # debug logging will be completely swamped. - logging.getLogger('blender_asset_tracer').setLevel(args.loglevel) + logging.getLogger("blender_asset_tracer").setLevel(args.loglevel) + + +def set_strict_pointer_mode(strict_pointers: bool) -> None: + from blender_asset_tracer import blendfile + + blendfile.set_strict_pointer_mode(strict_pointers) diff --git a/cli/blocks.py b/cli/blocks.py index ea450a6..b020fcd 100644 --- a/cli/blocks.py +++ b/cli/blocks.py @@ -34,19 +34,29 @@ class BlockTypeInfo: self.num_blocks = 0 self.sizes = [] self.blocks = [] - self.name = 'unset' + self.name = "unset" def add_parser(subparsers): """Add argparser for this subcommand.""" - parser = subparsers.add_parser('blocks', help=__doc__) + parser = subparsers.add_parser("blocks", help=__doc__) parser.set_defaults(func=cli_blocks) - parser.add_argument('blendfile', type=pathlib.Path) - parser.add_argument('-d', '--dump', default=False, action='store_true', - help='Hex-dump the biggest block') - parser.add_argument('-l', '--limit', default=10, type=int, - help='Limit the number of DNA types shown, default is 10') + parser.add_argument("blendfile", type=pathlib.Path) + parser.add_argument( + "-d", + "--dump", + default=False, + action="store_true", + help="Hex-dump the biggest block", + ) + parser.add_argument( + "-l", + "--limit", + default=10, + type=int, + help="Limit the number of DNA types shown, default is 10", + ) def by_total_bytes(info: BlockTypeInfo) -> int: @@ -54,23 +64,23 @@ def by_total_bytes(info: BlockTypeInfo) -> int: def block_key(block: blendfile.BlendFileBlock) -> str: - return '%s-%s' % (block.dna_type_name, block.code.decode()) + return "%s-%s" % (block.dna_type_name, block.code.decode()) def cli_blocks(args): bpath = args.blendfile if not bpath.exists(): - log.fatal('File %s does not exist', args.blendfile) + log.fatal("File %s does not exist", args.blendfile) return 3 per_blocktype = collections.defaultdict(BlockTypeInfo) - print('Opening %s' % bpath) + print("Opening %s" % bpath) bfile = blendfile.BlendFile(bpath) - print('Inspecting %s' % bpath) + print("Inspecting %s" % bpath) for block in bfile.blocks: - if block.code == b'DNA1': + if block.code == b"DNA1": continue index_as = block_key(block) @@ -81,49 +91,58 @@ def cli_blocks(args): info.sizes.append(block.size) info.blocks.append(block) - fmt = '%-35s %10s %10s %10s %10s' - print(fmt % ('Block type', 'Total Size', 'Num blocks', 'Avg Size', 'Median')) - print(fmt % (35 * '-', 10 * '-', 10 * '-', 10 * '-', 10 * '-')) + fmt = "%-35s %10s %10s %10s %10s" + print(fmt % ("Block type", "Total Size", "Num blocks", "Avg Size", "Median")) + print(fmt % (35 * "-", 10 * "-", 10 * "-", 10 * "-", 10 * "-")) infos = sorted(per_blocktype.values(), key=by_total_bytes, reverse=True) - for info in infos[:args.limit]: + for info in infos[: args.limit]: median_size = sorted(info.sizes)[len(info.sizes) // 2] - print(fmt % (info.name, - common.humanize_bytes(info.total_bytes), - info.num_blocks, - common.humanize_bytes(info.total_bytes // info.num_blocks), - common.humanize_bytes(median_size) - )) + print( + fmt + % ( + info.name, + common.humanize_bytes(info.total_bytes), + info.num_blocks, + common.humanize_bytes(info.total_bytes // info.num_blocks), + common.humanize_bytes(median_size), + ) + ) - print(70 * '-') + print(70 * "-") # From the blocks of the most space-using category, the biggest block. - biggest_block = sorted(infos[0].blocks, - key=lambda blck: blck.size, - reverse=True)[0] - print('Biggest %s block is %s at address %s' % ( - block_key(biggest_block), - common.humanize_bytes(biggest_block.size), - biggest_block.addr_old, - )) + biggest_block = sorted(infos[0].blocks, key=lambda blck: blck.size, reverse=True)[0] + print( + "Biggest %s block is %s at address %s" + % ( + block_key(biggest_block), + common.humanize_bytes(biggest_block.size), + biggest_block.addr_old, + ) + ) - print('Finding what points there') + print("Finding what points there") addr_to_find = biggest_block.addr_old found_pointer = False for block in bfile.blocks: for prop_path, prop_value in block.items_recursive(): if not isinstance(prop_value, int) or prop_value != addr_to_find: continue - print(' ', block, prop_path) + print(" ", block, prop_path) found_pointer = True if not found_pointer: - print('Nothing points there') + print("Nothing points there") if args.dump: - print('Hexdump:') + print("Hexdump:") bfile.fileobj.seek(biggest_block.file_offset) data = bfile.fileobj.read(biggest_block.size) line_len_bytes = 32 import codecs + for offset in range(0, len(data), line_len_bytes): - line = codecs.encode(data[offset:offset + line_len_bytes], 'hex').decode() - print('%6d -' % offset, ' '.join(line[i:i + 2] for i in range(0, len(line), 2))) + line = codecs.encode(data[offset : offset + line_len_bytes], "hex").decode() + print( + "%6d -" % offset, + " ".join(line[i : i + 2] for i in range(0, len(line), 2)), + ) diff --git a/cli/common.py b/cli/common.py index 938fd0b..ad617e5 100644 --- a/cli/common.py +++ b/cli/common.py @@ -29,11 +29,13 @@ def add_flag(argparser, flag_name: str, **kwargs): The flag defaults to False, and when present on the CLI stores True. """ - argparser.add_argument('-%s' % flag_name[0], - '--%s' % flag_name, - default=False, - action='store_true', - **kwargs) + argparser.add_argument( + "-%s" % flag_name[0], + "--%s" % flag_name, + default=False, + action="store_true", + **kwargs + ) def shorten(cwd: pathlib.Path, somepath: pathlib.Path) -> pathlib.Path: @@ -44,7 +46,7 @@ def shorten(cwd: pathlib.Path, somepath: pathlib.Path) -> pathlib.Path: return somepath -def humanize_bytes(size_in_bytes: int, precision: typing.Optional[int]=None): +def humanize_bytes(size_in_bytes: int, precision: typing.Optional[int] = None): """Return a humanized string representation of a number of bytes. Source: http://code.activestate.com/recipes/577081-humanized-representation-of-a-number-of-bytes @@ -78,22 +80,23 @@ def humanize_bytes(size_in_bytes: int, precision: typing.Optional[int]=None): precision = size_in_bytes >= 1024 abbrevs = ( - (1 << 50, 'PB'), - (1 << 40, 'TB'), - (1 << 30, 'GB'), - (1 << 20, 'MB'), - (1 << 10, 'kB'), - (1, 'B') + (1 << 50, "PB"), + (1 << 40, "TB"), + (1 << 30, "GB"), + (1 << 20, "MB"), + (1 << 10, "kB"), + (1, "B"), ) for factor, suffix in abbrevs: if size_in_bytes >= factor: break else: factor = 1 - suffix = 'B' - return '%.*f %s' % (precision, size_in_bytes / factor, suffix) + suffix = "B" + return "%.*f %s" % (precision, size_in_bytes / factor, suffix) -if __name__ == '__main__': +if __name__ == "__main__": import doctest + doctest.testmod() diff --git a/cli/list_deps.py b/cli/list_deps.py index 42c93ba..7b50317 100644 --- a/cli/list_deps.py +++ b/cli/list_deps.py @@ -36,27 +36,32 @@ log = logging.getLogger(__name__) def add_parser(subparsers): """Add argparser for this subcommand.""" - parser = subparsers.add_parser('list', help=__doc__) + parser = subparsers.add_parser("list", help=__doc__) parser.set_defaults(func=cli_list) - parser.add_argument('blendfile', type=pathlib.Path) - common.add_flag(parser, 'json', help='Output as JSON instead of human-readable text') - common.add_flag(parser, 'sha256', - help='Include SHA256sums in the output. Note that those may differ from the ' - 'SHA256sums in a BAT-pack when paths are rewritten.') - common.add_flag(parser, 'timing', help='Include timing information in the output') + parser.add_argument("blendfile", type=pathlib.Path) + common.add_flag( + parser, "json", help="Output as JSON instead of human-readable text" + ) + common.add_flag( + parser, + "sha256", + help="Include SHA256sums in the output. Note that those may differ from the " + "SHA256sums in a BAT-pack when paths are rewritten.", + ) + common.add_flag(parser, "timing", help="Include timing information in the output") def cli_list(args): bpath = args.blendfile if not bpath.exists(): - log.fatal('File %s does not exist', args.blendfile) + log.fatal("File %s does not exist", args.blendfile) return 3 if args.json: if args.sha256: - log.fatal('--sha256 can currently not be used in combination with --json') + log.fatal("--sha256 can currently not be used in combination with --json") if args.timing: - log.fatal('--timing can currently not be used in combination with --json') + log.fatal("--timing can currently not be used in combination with --json") report_json(bpath) else: report_text(bpath, include_sha256=args.sha256, show_timing=args.timing) @@ -66,13 +71,13 @@ def calc_sha_sum(filepath: pathlib.Path) -> typing.Tuple[str, float]: start = time.time() if filepath.is_dir(): - for subfile in filepath.rglob('*'): + for subfile in filepath.rglob("*"): calc_sha_sum(subfile) duration = time.time() - start - return '-multiple-', duration + return "-multiple-", duration summer = hashlib.sha256() - with filepath.open('rb') as infile: + with filepath.open("rb") as infile: while True: block = infile.read(32 * 1024) if not block: @@ -108,24 +113,24 @@ def report_text(bpath, *, include_sha256: bool, show_timing: bool): for assetpath in usage.files(): assetpath = bpathlib.make_absolute(assetpath) if assetpath in reported_assets: - log.debug('Already reported %s', assetpath) + log.debug("Already reported %s", assetpath) continue if include_sha256: shasum, time_spent = calc_sha_sum(assetpath) time_spent_on_shasums += time_spent - print(' ', shorten(assetpath), shasum) + print(" ", shorten(assetpath), shasum) else: - print(' ', shorten(assetpath)) + print(" ", shorten(assetpath)) reported_assets.add(assetpath) if show_timing: duration = time.time() - start_time - print('Spent %.2f seconds on producing this listing' % duration) + print("Spent %.2f seconds on producing this listing" % duration) if include_sha256: - print('Spent %.2f seconds on calculating SHA sums' % time_spent_on_shasums) + print("Spent %.2f seconds on calculating SHA sums" % time_spent_on_shasums) percentage = time_spent_on_shasums / duration * 100 - print(' (that is %d%% of the total time' % percentage) + print(" (that is %d%% of the total time" % percentage) class JSONSerialiser(json.JSONEncoder): diff --git a/cli/pack.py b/cli/pack.py index b7b400b..6a22a14 100644 --- a/cli/pack.py +++ b/cli/pack.py @@ -32,36 +32,62 @@ log = logging.getLogger(__name__) def add_parser(subparsers): """Add argparser for this subcommand.""" - parser = subparsers.add_parser('pack', help=__doc__) + parser = subparsers.add_parser("pack", help=__doc__) parser.set_defaults(func=cli_pack) - parser.add_argument('blendfile', type=pathlib.Path, - help='The Blend file to pack.') - parser.add_argument('target', type=str, - help="The target can be a directory, a ZIP file (does not have to exist " - "yet, just use 'something.zip' as target), " - "or a URL of S3 storage (s3://endpoint/path) " - "or Shaman storage (shaman://endpoint/#checkoutID).") + parser.add_argument("blendfile", type=pathlib.Path, help="The Blend file to pack.") + parser.add_argument( + "target", + type=str, + help="The target can be a directory, a ZIP file (does not have to exist " + "yet, just use 'something.zip' as target), " + "or a URL of S3 storage (s3://endpoint/path) " + "or Shaman storage (shaman://endpoint/#checkoutID).", + ) - parser.add_argument('-p', '--project', type=pathlib.Path, - help='Root directory of your project. Paths to below this directory are ' - 'kept in the BAT Pack as well, whereas references to assets from ' - 'outside this directory will have to be rewitten. The blend file MUST ' - 'be inside the project directory. If this option is ommitted, the ' - 'directory containing the blend file is taken as the project ' - 'directoy.') - parser.add_argument('-n', '--noop', default=False, action='store_true', - help="Don't copy files, just show what would be done.") - parser.add_argument('-e', '--exclude', nargs='*', default='', - help="Space-separated list of glob patterns (like '*.abc *.vbo') to " - "exclude.") - parser.add_argument('-c', '--compress', default=False, action='store_true', - help='Compress blend files while copying. This option is only valid when ' - 'packing into a directory (contrary to ZIP file or S3 upload). ' - 'Note that files will NOT be compressed when the destination file ' - 'already exists and has the same size as the original file.') - parser.add_argument('-r', '--relative-only', default=False, action='store_true', - help='Only pack assets that are referred to with a relative path (e.g. ' - 'starting with `//`.') + parser.add_argument( + "-p", + "--project", + type=pathlib.Path, + help="Root directory of your project. Paths to below this directory are " + "kept in the BAT Pack as well, whereas references to assets from " + "outside this directory will have to be rewitten. The blend file MUST " + "be inside the project directory. If this option is ommitted, the " + "directory containing the blend file is taken as the project " + "directoy.", + ) + parser.add_argument( + "-n", + "--noop", + default=False, + action="store_true", + help="Don't copy files, just show what would be done.", + ) + parser.add_argument( + "-e", + "--exclude", + nargs="*", + default="", + help="Space-separated list of glob patterns (like '*.abc *.vbo') to " + "exclude.", + ) + parser.add_argument( + "-c", + "--compress", + default=False, + action="store_true", + help="Compress blend files while copying. This option is only valid when " + "packing into a directory (contrary to ZIP file or S3 upload). " + "Note that files will NOT be compressed when the destination file " + "already exists and has the same size as the original file.", + ) + parser.add_argument( + "-r", + "--relative-only", + default=False, + action="store_true", + help="Only pack assets that are referred to with a relative path (e.g. " + "starting with `//`.", + ) def cli_pack(args): @@ -72,55 +98,70 @@ def cli_pack(args): try: packer.execute() except blender_asset_tracer.pack.transfer.FileTransferError as ex: - log.error("%d files couldn't be copied, starting with %s", - len(ex.files_remaining), ex.files_remaining[0]) + log.error( + "%d files couldn't be copied, starting with %s", + len(ex.files_remaining), + ex.files_remaining[0], + ) raise SystemExit(1) -def create_packer(args, bpath: pathlib.Path, ppath: pathlib.Path, target: str) -> pack.Packer: - if target.startswith('s3:/'): +def create_packer( + args, bpath: pathlib.Path, ppath: pathlib.Path, target: str +) -> pack.Packer: + if target.startswith("s3:/"): if args.noop: - raise ValueError('S3 uploader does not support no-op.') + raise ValueError("S3 uploader does not support no-op.") if args.compress: - raise ValueError('S3 uploader does not support on-the-fly compression') + raise ValueError("S3 uploader does not support on-the-fly compression") if args.relative_only: - raise ValueError('S3 uploader does not support the --relative-only option') + raise ValueError("S3 uploader does not support the --relative-only option") packer = create_s3packer(bpath, ppath, pathlib.PurePosixPath(target)) - elif target.startswith('shaman+http:/') or target.startswith('shaman+https:/') \ - or target.startswith('shaman:/'): + elif ( + target.startswith("shaman+http:/") + or target.startswith("shaman+https:/") + or target.startswith("shaman:/") + ): if args.noop: - raise ValueError('Shaman uploader does not support no-op.') + raise ValueError("Shaman uploader does not support no-op.") if args.compress: - raise ValueError('Shaman uploader does not support on-the-fly compression') + raise ValueError("Shaman uploader does not support on-the-fly compression") if args.relative_only: - raise ValueError('Shaman uploader does not support the --relative-only option') + raise ValueError( + "Shaman uploader does not support the --relative-only option" + ) packer = create_shamanpacker(bpath, ppath, target) - elif target.lower().endswith('.zip'): + elif target.lower().endswith(".zip"): from blender_asset_tracer.pack import zipped if args.compress: - raise ValueError('ZIP packer does not support on-the-fly compression') + raise ValueError("ZIP packer does not support on-the-fly compression") - packer = zipped.ZipPacker(bpath, ppath, target, noop=args.noop, - relative_only=args.relative_only) + packer = zipped.ZipPacker( + bpath, ppath, target, noop=args.noop, relative_only=args.relative_only + ) else: - packer = pack.Packer(bpath, ppath, target, noop=args.noop, - compress=args.compress, relative_only=args.relative_only) + packer = pack.Packer( + bpath, + ppath, + target, + noop=args.noop, + compress=args.compress, + relative_only=args.relative_only, + ) if args.exclude: # args.exclude is a list, due to nargs='*', so we have to split and flatten. - globs = [glob - for globs in args.exclude - for glob in globs.split()] - log.info('Excluding: %s', ', '.join(repr(g) for g in globs)) + globs = [glob for globs in args.exclude for glob in globs.split()] + log.info("Excluding: %s", ", ".join(repr(g) for g in globs)) packer.exclude(*globs) return packer @@ -130,14 +171,16 @@ def create_s3packer(bpath, ppath, tpath) -> pack.Packer: # Split the target path into 's3:/', hostname, and actual target path parts = tpath.parts - endpoint = 'https://%s/' % parts[1] + endpoint = "https://%s/" % parts[1] tpath = pathlib.Path(*tpath.parts[2:]) - log.info('Uploading to S3-compatible storage %s at %s', endpoint, tpath) + log.info("Uploading to S3-compatible storage %s at %s", endpoint, tpath) return s3.S3Packer(bpath, ppath, tpath, endpoint=endpoint) -def create_shamanpacker(bpath: pathlib.Path, ppath: pathlib.Path, tpath: str) -> pack.Packer: +def create_shamanpacker( + bpath: pathlib.Path, ppath: pathlib.Path, tpath: str +) -> pack.Packer: """Creates a package for sending files to a Shaman server. URLs should have the form: @@ -149,11 +192,15 @@ def create_shamanpacker(bpath: pathlib.Path, ppath: pathlib.Path, tpath: str) -> endpoint, checkout_id = shaman.parse_endpoint(tpath) if not checkout_id: - log.warning('No checkout ID given on the URL. Going to send BAT pack to Shaman, ' - 'but NOT creating a checkout') + log.warning( + "No checkout ID given on the URL. Going to send BAT pack to Shaman, " + "but NOT creating a checkout" + ) - log.info('Uploading to Shaman server %s with job %s', endpoint, checkout_id) - return shaman.ShamanPacker(bpath, ppath, '/', endpoint=endpoint, checkout_id=checkout_id) + log.info("Uploading to Shaman server %s with job %s", endpoint, checkout_id) + return shaman.ShamanPacker( + bpath, ppath, "/", endpoint=endpoint, checkout_id=checkout_id + ) def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, str]: @@ -163,10 +210,10 @@ def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, str]: """ bpath = args.blendfile if not bpath.exists(): - log.critical('File %s does not exist', bpath) + log.critical("File %s does not exist", bpath) sys.exit(3) if bpath.is_dir(): - log.critical('%s is a directory, should be a blend file') + log.critical("%s is a directory, should be a blend file") sys.exit(3) bpath = bpathlib.make_absolute(bpath) @@ -174,27 +221,34 @@ def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, str]: if args.project is None: ppath = bpathlib.make_absolute(bpath).parent - log.warning('No project path given, using %s', ppath) + log.warning("No project path given, using %s", ppath) else: ppath = bpathlib.make_absolute(args.project) if not ppath.exists(): - log.critical('Project directory %s does not exist', ppath) + log.critical("Project directory %s does not exist", ppath) sys.exit(5) if not ppath.is_dir(): - log.warning('Project path %s is not a directory; using the parent %s', ppath, ppath.parent) + log.warning( + "Project path %s is not a directory; using the parent %s", + ppath, + ppath.parent, + ) ppath = ppath.parent try: bpath.relative_to(ppath) except ValueError: - log.critical('Project directory %s does not contain blend file %s', - args.project, bpath.absolute()) + log.critical( + "Project directory %s does not contain blend file %s", + args.project, + bpath.absolute(), + ) sys.exit(5) - log.info('Blend file to pack: %s', bpath) - log.info('Project path: %s', ppath) - log.info('Pack will be created in: %s', tpath) + log.info("Blend file to pack: %s", bpath) + log.info("Project path: %s", ppath) + log.info("Pack will be created in: %s", tpath) return bpath, ppath, tpath diff --git a/cli/version.py b/cli/version.py new file mode 100644 index 0000000..dd16011 --- /dev/null +++ b/cli/version.py @@ -0,0 +1,32 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2021, Blender Foundation - Sybren A. Stüvel +"""Print version of BAT and exit.""" +from blender_asset_tracer import __version__ + + +def add_parser(subparsers): + """Add argparser for this subcommand.""" + + parser = subparsers.add_parser("version", help=__doc__) + parser.set_defaults(func=cli_version) + + +def cli_version(_): + print(__version__) diff --git a/compressor.py b/compressor.py index c800322..2f6b56d 100644 --- a/compressor.py +++ b/compressor.py @@ -5,10 +5,12 @@ import logging import pathlib import shutil +from blender_asset_tracer.blendfile import magic_compression + log = logging.getLogger(__name__) # Arbitrarily chosen block size, in bytes. -BLOCK_SIZE = 256 * 2 ** 10 +BLOCK_SIZE = 256 * 2**10 def move(src: pathlib.Path, dest: pathlib.Path): @@ -16,10 +18,10 @@ def move(src: pathlib.Path, dest: pathlib.Path): Only compresses files ending in .blend; others are moved as-is. """ - my_log = log.getChild('move') - my_log.debug('Moving %s to %s', src, dest) + my_log = log.getChild("move") + my_log.debug("Moving %s to %s", src, dest) - if src.suffix.lower() == '.blend': + if src.suffix.lower() == ".blend": _move_or_copy(src, dest, my_log, source_must_remain=False) else: shutil.move(str(src), str(dest)) @@ -30,19 +32,22 @@ def copy(src: pathlib.Path, dest: pathlib.Path): Only compresses files ending in .blend; others are copied as-is. """ - my_log = log.getChild('copy') - my_log.debug('Copying %s to %s', src, dest) + my_log = log.getChild("copy") + my_log.debug("Copying %s to %s", src, dest) - if src.suffix.lower() == '.blend': + if src.suffix.lower() == ".blend": _move_or_copy(src, dest, my_log, source_must_remain=True) else: shutil.copy2(str(src), str(dest)) -def _move_or_copy(src: pathlib.Path, dest: pathlib.Path, - my_log: logging.Logger, - *, - source_must_remain: bool): +def _move_or_copy( + src: pathlib.Path, + dest: pathlib.Path, + my_log: logging.Logger, + *, + source_must_remain: bool +): """Either move or copy a file, gzip-compressing if not compressed yet. :param src: File to copy/move. @@ -50,27 +55,28 @@ def _move_or_copy(src: pathlib.Path, dest: pathlib.Path, :source_must_remain: True to copy, False to move. :my_log: Logger to use for logging. """ - srcfile = src.open('rb') + srcfile = src.open("rb") try: - first_bytes = srcfile.read(2) - if first_bytes == b'\x1f\x8b': - # Already a gzipped file. + comp_type = magic_compression.find_compression_type(srcfile) + if comp_type != magic_compression.Compression.NONE: + # Either already compressed or not a blend file. + # Either way we shouldn't attempt compressing this file. srcfile.close() - my_log.debug('Source file %s is GZipped already', src) + my_log.debug("Source file %s is compressed already", src) if source_must_remain: shutil.copy2(str(src), str(dest)) else: shutil.move(str(src), str(dest)) return - my_log.debug('Compressing %s on the fly while copying to %s', src, dest) - with gzip.open(str(dest), mode='wb') as destfile: - destfile.write(first_bytes) + my_log.debug("Compressing %s on the fly while copying to %s", src, dest) + srcfile.seek(0) + with gzip.open(str(dest), mode="wb") as destfile: shutil.copyfileobj(srcfile, destfile, BLOCK_SIZE) srcfile.close() if not source_must_remain: - my_log.debug('Deleting source file %s', src) + my_log.debug("Deleting source file %s", src) src.unlink() finally: if not srcfile.closed: diff --git a/pack/__init__.py b/pack/__init__.py index 92b77b1..3a60bc4 100644 --- a/pack/__init__.py +++ b/pack/__init__.py @@ -28,6 +28,7 @@ import typing from blender_asset_tracer import trace, bpathlib, blendfile from blender_asset_tracer.trace import file_sequence, result + from . import filesystem, transfer, progress log = logging.getLogger(__name__) @@ -93,14 +94,16 @@ class Packer: instance. """ - def __init__(self, - bfile: pathlib.Path, - project: pathlib.Path, - target: str, - *, - noop=False, - compress=False, - relative_only=False) -> None: + def __init__( + self, + bfile: pathlib.Path, + project: pathlib.Path, + target: str, + *, + noop=False, + compress=False, + relative_only=False + ) -> None: self.blendfile = bfile self.project = project self.target = target @@ -110,7 +113,7 @@ class Packer: self.relative_only = relative_only self._aborted = threading.Event() self._abort_lock = threading.RLock() - self._abort_reason = '' + self._abort_reason = "" # Set this to a custom Callback() subclass instance before calling # strategise() to receive progress reports. @@ -119,15 +122,15 @@ class Packer: self._exclude_globs = set() # type: typing.Set[str] - from blender_asset_tracer.cli import common - self._shorten = functools.partial(common.shorten, self.project) + self._shorten = functools.partial(shorten_path, self.project) if noop: - log.warning('Running in no-op mode, only showing what will be done.') + log.warning("Running in no-op mode, only showing what will be done.") # Filled by strategise() - self._actions = collections.defaultdict(AssetAction) \ - # type: typing.DefaultDict[pathlib.Path, AssetAction] + self._actions = collections.defaultdict( + AssetAction + ) # type: typing.DefaultDict[pathlib.Path, AssetAction] self.missing_files = set() # type: typing.Set[pathlib.Path] self._new_location_paths = set() # type: typing.Set[pathlib.Path] self._output_path = None # type: typing.Optional[pathlib.PurePath] @@ -138,7 +141,7 @@ class Packer: # Number of files we would copy, if not for --noop self._file_count = 0 - self._tmpdir = tempfile.TemporaryDirectory(prefix='bat-', suffix='-batpack') + self._tmpdir = tempfile.TemporaryDirectory(prefix="bat-", suffix="-batpack") self._rewrite_in = pathlib.Path(self._tmpdir.name) def _make_target_path(self, target: str) -> pathlib.PurePath: @@ -155,7 +158,7 @@ class Packer: self._tscb.flush() self._tmpdir.cleanup() - def __enter__(self) -> 'Packer': + def __enter__(self) -> "Packer": return self def __exit__(self, exc_type, exc_val, exc_tb) -> None: @@ -177,7 +180,7 @@ class Packer: self._progress_cb = new_progress_cb self._tscb = progress.ThreadSafeCallback(self._progress_cb) - def abort(self, reason='') -> None: + def abort(self, reason="") -> None: """Aborts the current packing process. Can be called from any thread. Aborts as soon as the running strategise @@ -196,12 +199,12 @@ class Packer: with self._abort_lock: reason = self._abort_reason if self._file_transferer is not None and self._file_transferer.has_error: - log.error('A transfer error occurred') + log.error("A transfer error occurred") reason = self._file_transferer.error_message() elif not self._aborted.is_set(): return - log.warning('Aborting') + log.warning("Aborting") self._tscb.flush() self._progress_cb.pack_aborted(reason) raise Aborted(reason) @@ -212,8 +215,10 @@ class Packer: Must be called before calling strategise(). """ if self._actions: - raise RuntimeError('%s.exclude() must be called before strategise()' % - self.__class__.__qualname__) + raise RuntimeError( + "%s.exclude() must be called before strategise()" + % self.__class__.__qualname__ + ) self._exclude_globs.update(globs) def strategise(self) -> None: @@ -236,7 +241,9 @@ class Packer: # network shares mapped to Windows drive letters back to their UNC # notation. Only resolving one but not the other (which can happen # with the abosolute() call above) can cause errors. - bfile_pp = self._target_path / bfile_path.relative_to(bpathlib.make_absolute(self.project)) + bfile_pp = self._target_path / bfile_path.relative_to( + bpathlib.make_absolute(self.project) + ) self._output_path = bfile_pp self._progress_cb.pack_start() @@ -251,11 +258,11 @@ class Packer: self._check_aborted() asset_path = usage.abspath if any(asset_path.match(glob) for glob in self._exclude_globs): - log.info('Excluding file: %s', asset_path) + log.info("Excluding file: %s", asset_path) continue - if self.relative_only and not usage.asset_path.startswith(b'//'): - log.info('Skipping absolute path: %s', usage.asset_path) + if self.relative_only and not usage.asset_path.startswith(b"//"): + log.info("Skipping absolute path: %s", usage.asset_path) continue if usage.is_sequence: @@ -269,14 +276,22 @@ class Packer: def _visit_sequence(self, asset_path: pathlib.Path, usage: result.BlockUsage): assert usage.is_sequence - for first_path in file_sequence.expand_sequence(asset_path): - if first_path.exists(): - break - else: - # At least the first file of a sequence must exist. - log.warning('Missing file: %s', asset_path) + def handle_missing_file(): + log.warning("Missing file: %s", asset_path) self.missing_files.add(asset_path) self._progress_cb.missing_file(asset_path) + + try: + for file_path in file_sequence.expand_sequence(asset_path): + if file_path.exists(): + break + else: + # At least some file of a sequence must exist. + handle_missing_file() + return + except file_sequence.DoesNotExist: + # The asset path should point to something existing. + handle_missing_file() return # Handle this sequence as an asset. @@ -291,7 +306,7 @@ class Packer: # Sequences are allowed to not exist at this point. if not usage.is_sequence and not asset_path.exists(): - log.warning('Missing file: %s', asset_path) + log.warning("Missing file: %s", asset_path) self.missing_files.add(asset_path) self._progress_cb.missing_file(asset_path) return @@ -315,11 +330,11 @@ class Packer: act.usages.append(usage) if needs_rewriting: - log.info('%s needs rewritten path to %s', bfile_path, usage.asset_path) + log.info("%s needs rewritten path to %s", bfile_path, usage.asset_path) act.path_action = PathAction.FIND_NEW_LOCATION self._new_location_paths.add(asset_path) else: - log.debug('%s can keep using %s', bfile_path, usage.asset_path) + log.debug("%s can keep using %s", bfile_path, usage.asset_path) asset_pp = self._target_path / asset_path.relative_to(self.project) act.new_path = asset_pp @@ -331,7 +346,7 @@ class Packer: assert isinstance(act, AssetAction) relpath = bpathlib.strip_root(path) - act.new_path = pathlib.Path(self._target_path, '_outside_project', relpath) + act.new_path = pathlib.Path(self._target_path, "_outside_project", relpath) def _group_rewrites(self) -> None: """For each blend file, collect which fields need rewriting. @@ -370,7 +385,7 @@ class Packer: def execute(self) -> None: """Execute the strategy.""" - assert self._actions, 'Run strategise() first' + assert self._actions, "Run strategise() first" if not self.noop: self._rewrite_paths() @@ -408,7 +423,7 @@ class Packer: This creates the BAT Pack but does not yet do any path rewriting. """ - log.debug('Executing %d copy actions', len(self._actions)) + log.debug("Executing %d copy actions", len(self._actions)) assert self._file_transferer is not None @@ -418,12 +433,12 @@ class Packer: self._copy_asset_and_deps(asset_path, action) if self.noop: - log.info('Would copy %d files to %s', self._file_count, self.target) + log.info("Would copy %d files to %s", self._file_count, self.target) return self._file_transferer.done_and_join() self._on_file_transfer_finished(file_transfer_completed=True) except KeyboardInterrupt: - log.info('File transfer interrupted with Ctrl+C, aborting.') + log.info("File transfer interrupted with Ctrl+C, aborting.") self._file_transferer.abort_and_join() self._on_file_transfer_finished(file_transfer_completed=False) raise @@ -460,23 +475,26 @@ class Packer: # It is *not* used for any disk I/O, since the file may not even # exist on the local filesystem. bfile_pp = action.new_path - assert bfile_pp is not None + assert bfile_pp is not None, \ + f"Action {action.path_action.name} on {bfile_path} has no final path set, unable to process" # Use tempfile to create a unique name in our temporary directoy. # The file should be deleted when self.close() is called, and not # when the bfile_tp object is GC'd. - bfile_tmp = tempfile.NamedTemporaryFile(dir=str(self._rewrite_in), - prefix='bat-', - suffix='-' + bfile_path.name, - delete=False) + bfile_tmp = tempfile.NamedTemporaryFile( + dir=str(self._rewrite_in), + prefix="bat-", + suffix="-" + bfile_path.name, + delete=False, + ) bfile_tp = pathlib.Path(bfile_tmp.name) action.read_from = bfile_tp - log.info('Rewriting %s to %s', bfile_path, bfile_tp) + log.info("Rewriting %s to %s", bfile_path, bfile_tp) # The original blend file will have been cached, so we can use it # to avoid re-parsing all data blocks in the to-be-rewritten file. bfile = blendfile.open_cached(bfile_path, assert_cached=True) - bfile.copy_and_rebind(bfile_tp, mode='rb+') + bfile.copy_and_rebind(bfile_tp, mode="rb+") for usage in action.rewrites: self._check_aborted() @@ -484,25 +502,35 @@ class Packer: asset_pp = self._actions[usage.abspath].new_path assert isinstance(asset_pp, pathlib.Path) - log.debug(' - %s is packed at %s', usage.asset_path, asset_pp) + log.debug(" - %s is packed at %s", usage.asset_path, asset_pp) relpath = bpathlib.BlendPath.mkrelative(asset_pp, bfile_pp) if relpath == usage.asset_path: - log.info(' - %s remained at %s', usage.asset_path, relpath) + log.info(" - %s remained at %s", usage.asset_path, relpath) continue - log.info(' - %s moved to %s', usage.asset_path, relpath) + log.info(" - %s moved to %s", usage.asset_path, relpath) # Find the same block in the newly copied file. block = bfile.dereference_pointer(usage.block.addr_old) + + # Pointers can point to a non-existing data block, in which case + # either a SegmentationFault exception is thrown, or None is + # returned, based on the strict pointer mode set on the + # BlendFile class. Since this block was already meant to be + # rewritten, it was found before. + assert block is not None + if usage.path_full_field is None: dir_field = usage.path_dir_field assert dir_field is not None - log.debug(' - updating field %s of block %s', - dir_field.name.name_only, - block) + log.debug( + " - updating field %s of block %s", + dir_field.name.name_only, + block, + ) reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp) written = block.set(dir_field.name.name_only, reldir) - log.debug(' - written %d bytes', written) + log.debug(" - written %d bytes", written) # BIG FAT ASSUMPTION that the filename (e.g. basename # without path) does not change. This makes things much @@ -510,10 +538,13 @@ class Packer: # filename fields are in different blocks. See the # blocks2assets.scene() function for the implementation. else: - log.debug(' - updating field %s of block %s', - usage.path_full_field.name.name_only, block) + log.debug( + " - updating field %s of block %s", + usage.path_full_field.name.name_only, + block, + ) written = block.set(usage.path_full_field.name.name_only, relpath) - log.debug(' - written %d bytes', written) + log.debug(" - written %d bytes", written) # Make sure we close the file, otherwise changes may not be # flushed before it gets copied. @@ -524,12 +555,13 @@ class Packer: def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction): # Copy the asset itself, but only if it's not a sequence (sequences are # handled below in the for-loop). - if '*' not in str(asset_path): + if "*" not in str(asset_path) and "" not in asset_path.name: packed_path = action.new_path assert packed_path is not None read_path = action.read_from or asset_path - self._send_to_target(read_path, packed_path, - may_move=action.read_from is not None) + self._send_to_target( + read_path, packed_path, may_move=action.read_from is not None + ) # Copy its sequence dependencies. for usage in action.usages: @@ -541,7 +573,7 @@ class Packer: # In case of globbing, we only support globbing by filename, # and not by directory. - assert '*' not in str(first_pp) or '*' in first_pp.name + assert "*" not in str(first_pp) or "*" in first_pp.name packed_base_dir = first_pp.parent for file_path in usage.files(): @@ -552,17 +584,16 @@ class Packer: # Assumption: all data blocks using this asset use it the same way. break - def _send_to_target(self, - asset_path: pathlib.Path, - target: pathlib.PurePath, - may_move=False): + def _send_to_target( + self, asset_path: pathlib.Path, target: pathlib.PurePath, may_move=False + ): if self.noop: - print('%s -> %s' % (asset_path, target)) + print("%s -> %s" % (asset_path, target)) self._file_count += 1 return - verb = 'move' if may_move else 'copy' - log.debug('Queueing %s of %s', verb, asset_path) + verb = "move" if may_move else "copy" + log.debug("Queueing %s of %s", verb, asset_path) self._tscb.flush() @@ -575,13 +606,23 @@ class Packer: def _write_info_file(self): """Write a little text file with info at the top of the pack.""" - infoname = 'pack-info.txt' + infoname = "pack-info.txt" infopath = self._rewrite_in / infoname - log.debug('Writing info to %s', infopath) - with infopath.open('wt', encoding='utf8') as infofile: - print('This is a Blender Asset Tracer pack.', file=infofile) - print('Start by opening the following blend file:', file=infofile) - print(' %s' % self._output_path.relative_to(self._target_path).as_posix(), - file=infofile) + log.debug("Writing info to %s", infopath) + with infopath.open("wt", encoding="utf8") as infofile: + print("This is a Blender Asset Tracer pack.", file=infofile) + print("Start by opening the following blend file:", file=infofile) + print( + " %s" % self._output_path.relative_to(self._target_path).as_posix(), + file=infofile, + ) self._file_transferer.queue_move(infopath, self._target_path / infoname) + + +def shorten_path(cwd: pathlib.Path, somepath: pathlib.Path) -> pathlib.Path: + """Return 'somepath' relative to CWD if possible.""" + try: + return somepath.relative_to(cwd) + except ValueError: + return somepath diff --git a/pack/filesystem.py b/pack/filesystem.py index deba9db..b2deac7 100644 --- a/pack/filesystem.py +++ b/pack/filesystem.py @@ -81,9 +81,9 @@ class FileCopier(transfer.FileTransferer): # We have to catch exceptions in a broad way, as this is running in # a separate thread, and exceptions won't otherwise be seen. if self._abort.is_set(): - log.debug('Error transferring %s to %s: %s', src, dst, ex) + log.debug("Error transferring %s to %s: %s", src, dst, ex) else: - msg = 'Error transferring %s to %s' % (src, dst) + msg = "Error transferring %s to %s" % (src, dst) log.exception(msg) self.error_set(msg) # Put the files to copy back into the queue, and abort. This allows @@ -93,16 +93,16 @@ class FileCopier(transfer.FileTransferer): self.queue.put((src, dst, act), timeout=1.0) break - log.debug('All transfer threads queued') + log.debug("All transfer threads queued") pool.close() - log.debug('Waiting for transfer threads to finish') + log.debug("Waiting for transfer threads to finish") pool.join() - log.debug('All transfer threads finished') + log.debug("All transfer threads finished") if self.files_transferred: - log.info('Transferred %d files', self.files_transferred) + log.info("Transferred %d files", self.files_transferred) if self.files_skipped: - log.info('Skipped %d files', self.files_skipped) + log.info("Skipped %d files", self.files_skipped) def _thread(self, src: pathlib.Path, dst: pathlib.Path, act: transfer.Action): try: @@ -111,7 +111,7 @@ class FileCopier(transfer.FileTransferer): if self.has_error or self._abort.is_set(): raise AbortTransfer() - log.info('%s %s -> %s', act.name, src, dst) + log.info("%s %s -> %s", act.name, src, dst) tfunc(src, dst) except AbortTransfer: # either self._error or self._abort is already set. We just have to @@ -121,9 +121,9 @@ class FileCopier(transfer.FileTransferer): # We have to catch exceptions in a broad way, as this is running in # a separate thread, and exceptions won't otherwise be seen. if self._abort.is_set(): - log.debug('Error transferring %s to %s: %s', src, dst, ex) + log.debug("Error transferring %s to %s: %s", src, dst, ex) else: - msg = 'Error transferring %s to %s' % (src, dst) + msg = "Error transferring %s to %s" % (src, dst) log.exception(msg) self.error_set(msg) # Put the files to copy back into the queue, and abort. This allows @@ -132,7 +132,9 @@ class FileCopier(transfer.FileTransferer): # be reported there. self.queue.put((src, dst, act), timeout=1.0) - def _skip_file(self, src: pathlib.Path, dst: pathlib.Path, act: transfer.Action) -> bool: + def _skip_file( + self, src: pathlib.Path, dst: pathlib.Path, act: transfer.Action + ) -> bool: """Skip this file (return True) or not (return False).""" st_src = src.stat() # must exist, or it wouldn't be queued. if not dst.exists(): @@ -142,20 +144,20 @@ class FileCopier(transfer.FileTransferer): if st_dst.st_size != st_src.st_size or st_dst.st_mtime < st_src.st_mtime: return False - log.info('SKIP %s; already exists', src) + log.info("SKIP %s; already exists", src) if act == transfer.Action.MOVE: - log.debug('Deleting %s', src) + log.debug("Deleting %s", src) src.unlink() self.files_skipped += 1 return True def _move(self, srcpath: pathlib.Path, dstpath: pathlib.Path): - """Low-level file move""" + """Low-level file move.""" shutil.move(str(srcpath), str(dstpath)) def _copy(self, srcpath: pathlib.Path, dstpath: pathlib.Path): - """Low-level file copy""" - shutil.copy2(str(srcpath), str(dstpath)) + """Low-level file copy. dstpath needs to be a file and not a directory.""" + shutil.copyfile(str(srcpath), str(dstpath)) def move(self, srcpath: pathlib.Path, dstpath: pathlib.Path): s_stat = srcpath.stat() @@ -171,19 +173,19 @@ class FileCopier(transfer.FileTransferer): return if (srcpath, dstpath) in self.already_copied: - log.debug('SKIP %s; already copied', srcpath) + log.debug("SKIP %s; already copied", srcpath) return s_stat = srcpath.stat() # must exist, or it wouldn't be queued. if dstpath.exists(): d_stat = dstpath.stat() if d_stat.st_size == s_stat.st_size and d_stat.st_mtime >= s_stat.st_mtime: - log.info('SKIP %s; already exists', srcpath) + log.info("SKIP %s; already exists", srcpath) self.progress_cb.transfer_file_skipped(srcpath, dstpath) self.files_skipped += 1 return - log.debug('Copying %s -> %s', srcpath, dstpath) + log.debug("Copying %s -> %s", srcpath, dstpath) self._copy(srcpath, dstpath) self.already_copied.add((srcpath, dstpath)) @@ -191,8 +193,13 @@ class FileCopier(transfer.FileTransferer): self.report_transferred(s_stat.st_size) - def copytree(self, src: pathlib.Path, dst: pathlib.Path, - symlinks=False, ignore_dangling_symlinks=False): + def copytree( + self, + src: pathlib.Path, + dst: pathlib.Path, + symlinks=False, + ignore_dangling_symlinks=False, + ): """Recursively copy a directory tree. Copy of shutil.copytree() with some changes: @@ -204,7 +211,7 @@ class FileCopier(transfer.FileTransferer): """ if (src, dst) in self.already_copied: - log.debug('SKIP %s; already copied', src) + log.debug("SKIP %s; already copied", src) return if self.has_error or self._abort.is_set(): @@ -225,7 +232,9 @@ class FileCopier(transfer.FileTransferer): # code with a custom `copy_function` may rely on copytree # doing the right thing. linkto.symlink_to(dstpath) - shutil.copystat(str(srcpath), str(dstpath), follow_symlinks=not symlinks) + shutil.copystat( + str(srcpath), str(dstpath), follow_symlinks=not symlinks + ) else: # ignore dangling symlink if the flag is on if not linkto.exists() and ignore_dangling_symlinks: @@ -250,7 +259,7 @@ class FileCopier(transfer.FileTransferer): shutil.copystat(str(src), str(dst)) except OSError as why: # Copying file access times may fail on Windows - if getattr(why, 'winerror', None) is None: + if getattr(why, "winerror", None) is None: errors.append((src, dst, str(why))) if errors: raise shutil.Error(errors) diff --git a/pack/progress.py b/pack/progress.py index 4b5e02e..aa364e1 100644 --- a/pack/progress.py +++ b/pack/progress.py @@ -37,9 +37,11 @@ class Callback(blender_asset_tracer.trace.progress.Callback): def pack_start(self) -> None: """Called when packing starts.""" - def pack_done(self, - output_blendfile: pathlib.PurePath, - missing_files: typing.Set[pathlib.Path]) -> None: + def pack_done( + self, + output_blendfile: pathlib.PurePath, + missing_files: typing.Set[pathlib.Path], + ) -> None: """Called when packing is done.""" def pack_aborted(self, reason: str): @@ -86,7 +88,7 @@ class ThreadSafeCallback(Callback): """ def __init__(self, wrapped: Callback) -> None: - self.log = log.getChild('ThreadSafeCallback') + self.log = log.getChild("ThreadSafeCallback") self.wrapped = wrapped # Thread-safe queue for passing progress reports on the main thread. @@ -104,9 +106,11 @@ class ThreadSafeCallback(Callback): def pack_start(self) -> None: self._queue(self.wrapped.pack_start) - def pack_done(self, - output_blendfile: pathlib.PurePath, - missing_files: typing.Set[pathlib.Path]) -> None: + def pack_done( + self, + output_blendfile: pathlib.PurePath, + missing_files: typing.Set[pathlib.Path], + ) -> None: self._queue(self.wrapped.pack_done, output_blendfile, missing_files) def pack_aborted(self, reason: str): @@ -135,8 +139,9 @@ class ThreadSafeCallback(Callback): while True: try: - call = self._reporting_queue.get(block=timeout is not None, - timeout=timeout) + call = self._reporting_queue.get( + block=timeout is not None, timeout=timeout + ) except queue.Empty: return @@ -145,4 +150,4 @@ class ThreadSafeCallback(Callback): except Exception: # Don't let the handling of one callback call # block the entire flush process. - self.log.exception('Error calling %s', call) + self.log.exception("Error calling %s", call) diff --git a/pack/s3.py b/pack/s3.py index d12c54d..770a38c 100644 --- a/pack/s3.py +++ b/pack/s3.py @@ -32,17 +32,18 @@ log = logging.getLogger(__name__) # TODO(Sybren): compute MD5 sums of queued files in a separate thread, so that # we can upload a file to S3 and compute an MD5 of another file simultaneously. + def compute_md5(filepath: pathlib.Path) -> str: - log.debug('Computing MD5sum of %s', filepath) + log.debug("Computing MD5sum of %s", filepath) hasher = hashlib.md5() - with filepath.open('rb') as infile: + with filepath.open("rb") as infile: while True: block = infile.read(102400) if not block: break hasher.update(block) md5 = hasher.hexdigest() - log.debug('MD5sum of %s is %s', filepath, md5) + log.debug("MD5sum of %s is %s", filepath, md5) return md5 @@ -63,20 +64,21 @@ class S3Packer(Packer): components = urllib.parse.urlparse(endpoint) profile_name = components.netloc endpoint = urllib.parse.urlunparse(components) - log.debug('Using Boto3 profile name %r for url %r', profile_name, endpoint) + log.debug("Using Boto3 profile name %r for url %r", profile_name, endpoint) self.session = boto3.Session(profile_name=profile_name) - self.client = self.session.client('s3', endpoint_url=endpoint) + self.client = self.session.client("s3", endpoint_url=endpoint) - def set_credentials(self, - endpoint: str, - access_key_id: str, - secret_access_key: str): + def set_credentials( + self, endpoint: str, access_key_id: str, secret_access_key: str + ): """Set S3 credentials.""" - self.client = self.session.client('s3', - endpoint_url=endpoint, - aws_access_key_id=access_key_id, - aws_secret_access_key=secret_access_key) + self.client = self.session.client( + "s3", + endpoint_url=endpoint, + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_access_key, + ) def _create_file_transferer(self) -> transfer.FileTransferer: return S3Transferrer(self.client) @@ -107,7 +109,7 @@ class S3Transferrer(transfer.FileTransferer): except Exception: # We have to catch exceptions in a broad way, as this is running in # a separate thread, and exceptions won't otherwise be seen. - log.exception('Error transferring %s to %s', src, dst) + log.exception("Error transferring %s to %s", src, dst) # Put the files to copy back into the queue, and abort. This allows # the main thread to inspect the queue and see which files were not # copied. The one we just failed (due to this exception) should also @@ -116,9 +118,9 @@ class S3Transferrer(transfer.FileTransferer): return if files_transferred: - log.info('Transferred %d files', files_transferred) + log.info("Transferred %d files", files_transferred) if files_skipped: - log.info('Skipped %d files', files_skipped) + log.info("Skipped %d files", files_skipped) def upload_file(self, src: pathlib.Path, dst: pathlib.PurePath) -> bool: """Upload a file to an S3 bucket. @@ -135,25 +137,30 @@ class S3Transferrer(transfer.FileTransferer): existing_md5, existing_size = self.get_metadata(bucket, key) if md5 == existing_md5 and src.stat().st_size == existing_size: - log.debug('skipping %s, it already exists on the server with MD5 %s', - src, existing_md5) + log.debug( + "skipping %s, it already exists on the server with MD5 %s", + src, + existing_md5, + ) return False - log.info('Uploading %s', src) + log.info("Uploading %s", src) try: - self.client.upload_file(str(src), - Bucket=bucket, - Key=key, - Callback=self.report_transferred, - ExtraArgs={'Metadata': {'md5': md5}}) + self.client.upload_file( + str(src), + Bucket=bucket, + Key=key, + Callback=self.report_transferred, + ExtraArgs={"Metadata": {"md5": md5}}, + ) except self.AbortUpload: return False return True def report_transferred(self, bytes_transferred: int): if self._abort.is_set(): - log.warning('Interrupting ongoing upload') - raise self.AbortUpload('interrupting ongoing upload') + log.warning("Interrupting ongoing upload") + raise self.AbortUpload("interrupting ongoing upload") super().report_transferred(bytes_transferred) def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]: @@ -165,18 +172,18 @@ class S3Transferrer(transfer.FileTransferer): """ import botocore.exceptions - log.debug('Getting metadata of %s/%s', bucket, key) + log.debug("Getting metadata of %s/%s", bucket, key) try: info = self.client.head_object(Bucket=bucket, Key=key) except botocore.exceptions.ClientError as ex: - error_code = ex.response.get('Error').get('Code', 'Unknown') + error_code = ex.response.get("Error").get("Code", "Unknown") # error_code already is a string, but this makes the code forward # compatible with a time where they use integer codes. - if str(error_code) == '404': - return '', -1 - raise ValueError('error response:' % ex.response) from None + if str(error_code) == "404": + return "", -1 + raise ValueError("error response:" % ex.response) from None try: - return info['Metadata']['md5'], info['ContentLength'] + return info["Metadata"]["md5"], info["ContentLength"] except KeyError: - return '', -1 + return "", -1 diff --git a/pack/shaman/__init__.py b/pack/shaman/__init__.py index 21543a3..3672dbe 100644 --- a/pack/shaman/__init__.py +++ b/pack/shaman/__init__.py @@ -17,7 +17,12 @@ # ***** END GPL LICENCE BLOCK ***** # # (c) 2019, Blender Foundation - Sybren A. Stüvel -"""Shaman Client interface.""" +"""Shaman Client interface. + +Note that this supports the Shaman API of Flamenco Manager 2.x. Support for +Flamenco 3.x will be implemented in a new Flamenco Blender add-on, and not in +BAT itself. +""" import logging import os import pathlib @@ -38,13 +43,15 @@ log = logging.getLogger(__name__) class ShamanPacker(bat_pack.Packer): """Creates BAT Packs on a Shaman server.""" - def __init__(self, - bfile: pathlib.Path, - project: pathlib.Path, - target: str, - endpoint: str, - checkout_id: str, - **kwargs) -> None: + def __init__( + self, + bfile: pathlib.Path, + project: pathlib.Path, + target: str, + endpoint: str, + checkout_id: str, + **kwargs + ) -> None: """Constructor :param target: mock target '/' to construct project-relative paths. @@ -53,18 +60,20 @@ class ShamanPacker(bat_pack.Packer): super().__init__(bfile, project, target, **kwargs) self.checkout_id = checkout_id self.shaman_endpoint = endpoint - self._checkout_location = '' + self._checkout_location = "" def _get_auth_token(self) -> str: # TODO: get a token from the Flamenco Server. - token_from_env = os.environ.get('SHAMAN_JWT_TOKEN') + token_from_env = os.environ.get("SHAMAN_JWT_TOKEN") if token_from_env: return token_from_env - log.warning('Using temporary hack to get auth token from Shaman, ' - 'set SHAMAN_JTW_TOKEN to prevent') - unauth_shaman = ShamanClient('', self.shaman_endpoint) - resp = unauth_shaman.get('get-token', timeout=10) + log.warning( + "Using temporary hack to get auth token from Shaman, " + "set SHAMAN_JTW_TOKEN to prevent" + ) + unauth_shaman = ShamanClient("", self.shaman_endpoint) + resp = unauth_shaman.get("get-token", timeout=10) resp.raise_for_status() return resp.text @@ -72,13 +81,17 @@ class ShamanPacker(bat_pack.Packer): # TODO: pass self._get_auth_token itself, so that the Transferer will be able to # decide when to get this token (and how many times). auth_token = self._get_auth_token() - return ShamanTransferrer(auth_token, self.project, self.shaman_endpoint, self.checkout_id) + return ShamanTransferrer( + auth_token, self.project, self.shaman_endpoint, self.checkout_id + ) def _make_target_path(self, target: str) -> pathlib.PurePath: - return pathlib.PurePosixPath('/') + return pathlib.PurePosixPath("/") def _on_file_transfer_finished(self, *, file_transfer_completed: bool): - super()._on_file_transfer_finished(file_transfer_completed=file_transfer_completed) + super()._on_file_transfer_finished( + file_transfer_completed=file_transfer_completed + ) assert isinstance(self._file_transferer, ShamanTransferrer) self._checkout_location = self._file_transferer.checkout_location @@ -104,7 +117,7 @@ class ShamanPacker(bat_pack.Packer): try: super().execute() except requests.exceptions.ConnectionError as ex: - log.exception('Error communicating with Shaman') + log.exception("Error communicating with Shaman") self.abort(str(ex)) self._check_aborted() @@ -114,17 +127,19 @@ def parse_endpoint(shaman_url: str) -> typing.Tuple[str, str]: urlparts = urllib.parse.urlparse(str(shaman_url)) - if urlparts.scheme in {'shaman', 'shaman+https'}: - scheme = 'https' - elif urlparts.scheme == 'shaman+http': - scheme = 'http' + if urlparts.scheme in {"shaman", "shaman+https"}: + scheme = "https" + elif urlparts.scheme == "shaman+http": + scheme = "http" else: - raise ValueError('Invalid scheme %r, choose shaman:// or shaman+http://', urlparts.scheme) + raise ValueError( + "Invalid scheme %r, choose shaman:// or shaman+http://", urlparts.scheme + ) checkout_id = urllib.parse.unquote(urlparts.fragment) - path = urlparts.path or '/' - new_urlparts = (scheme, urlparts.netloc, path, *urlparts[3:-1], '') + path = urlparts.path or "/" + new_urlparts = (scheme, urlparts.netloc, path, *urlparts[3:-1], "") endpoint = urllib.parse.urlunparse(new_urlparts) return endpoint, checkout_id diff --git a/pack/shaman/cache.py b/pack/shaman/cache.py index fc08257..27e29ea 100644 --- a/pack/shaman/cache.py +++ b/pack/shaman/cache.py @@ -30,7 +30,7 @@ from pathlib import Path from . import time_tracker -CACHE_ROOT = Path().home() / '.cache/shaman-client/shasums' +CACHE_ROOT = Path().home() / ".cache/shaman-client/shasums" MAX_CACHE_FILES_AGE_SECS = 3600 * 24 * 60 # 60 days log = logging.getLogger(__name__) @@ -53,7 +53,7 @@ def find_files(root: Path) -> typing.Iterable[Path]: # Ignore hidden files/dirs; these can be things like '.svn' or '.git', # which shouldn't be sent to Shaman. - if path.name.startswith('.'): + if path.name.startswith("."): continue if path.is_dir(): @@ -76,10 +76,10 @@ def compute_checksum(filepath: Path) -> str: """Compute the SHA256 checksum for the given file.""" blocksize = 32 * 1024 - log.debug('Computing checksum of %s', filepath) - with time_tracker.track_time(TimeInfo, 'computing_checksums'): + log.debug("Computing checksum of %s", filepath) + with time_tracker.track_time(TimeInfo, "computing_checksums"): hasher = hashlib.sha256() - with filepath.open('rb') as infile: + with filepath.open("rb") as infile: while True: block = infile.read(blocksize) if not block: @@ -98,7 +98,9 @@ def _cache_path(filepath: Path) -> Path: # Reverse the directory, because most variation is in the last bytes. rev_dir = str(filepath.parent)[::-1] encoded_path = filepath.stem + rev_dir + filepath.suffix - cache_key = base64.urlsafe_b64encode(encoded_path.encode(fs_encoding)).decode().rstrip('=') + cache_key = ( + base64.urlsafe_b64encode(encoded_path.encode(fs_encoding)).decode().rstrip("=") + ) cache_path = CACHE_ROOT / cache_key[:10] / cache_key[10:] return cache_path @@ -111,42 +113,44 @@ def compute_cached_checksum(filepath: Path) -> str: skip the actual SHA256 computation. """ - with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'): + with time_tracker.track_time(TimeInfo, "checksum_cache_handling"): current_stat = filepath.stat() cache_path = _cache_path(filepath) try: - with cache_path.open('r') as cache_file: + with cache_path.open("r") as cache_file: payload = json.load(cache_file) except (OSError, ValueError): # File may not exist, or have invalid contents. pass else: - checksum = payload.get('checksum', '') - cached_mtime = payload.get('file_mtime', 0.0) - cached_size = payload.get('file_size', -1) + checksum = payload.get("checksum", "") + cached_mtime = payload.get("file_mtime", 0.0) + cached_size = payload.get("file_size", -1) - if (checksum - and current_stat.st_size == cached_size - and abs(cached_mtime - current_stat.st_mtime) < 0.01): + if ( + checksum + and current_stat.st_size == cached_size + and abs(cached_mtime - current_stat.st_mtime) < 0.01 + ): cache_path.touch() return checksum checksum = compute_checksum(filepath) - with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'): + with time_tracker.track_time(TimeInfo, "checksum_cache_handling"): payload = { - 'checksum': checksum, - 'file_mtime': current_stat.st_mtime, - 'file_size': current_stat.st_size, + "checksum": checksum, + "file_mtime": current_stat.st_mtime, + "file_size": current_stat.st_size, } try: cache_path.parent.mkdir(parents=True, exist_ok=True) - with cache_path.open('w') as cache_file: + with cache_path.open("w") as cache_file: json.dump(payload, cache_file) except IOError as ex: - log.warning('Unable to write checksum cache file %s: %s', cache_path, ex) + log.warning("Unable to write checksum cache file %s: %s", cache_path, ex) return checksum @@ -157,7 +161,7 @@ def cleanup_cache() -> None: if not CACHE_ROOT.exists(): return - with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'): + with time_tracker.track_time(TimeInfo, "checksum_cache_handling"): queue = deque([CACHE_ROOT]) rmdir_queue = [] @@ -194,4 +198,8 @@ def cleanup_cache() -> None: pass if num_removed_dirs or num_removed_files: - log.info('Cache Cleanup: removed %d dirs and %d files', num_removed_dirs, num_removed_files) + log.info( + "Cache Cleanup: removed %d dirs and %d files", + num_removed_dirs, + num_removed_files, + ) diff --git a/pack/shaman/client.py b/pack/shaman/client.py index 7a51ec0..dc245db 100644 --- a/pack/shaman/client.py +++ b/pack/shaman/client.py @@ -37,14 +37,14 @@ class ShamanClient: ) http_adapter = requests.adapters.HTTPAdapter(max_retries=retries) self._session = requests.session() - self._session.mount('https://', http_adapter) - self._session.mount('http://', http_adapter) + self._session.mount("https://", http_adapter) + self._session.mount("http://", http_adapter) if auth_token: - self._session.headers['Authorization'] = 'Bearer ' + auth_token + self._session.headers["Authorization"] = "Bearer " + auth_token def request(self, method: str, url: str, **kwargs) -> requests.Response: - kwargs.setdefault('timeout', 300) + kwargs.setdefault("timeout", 300) full_url = urllib.parse.urljoin(self._base_url, url) return self._session.request(method, full_url, **kwargs) @@ -56,8 +56,8 @@ class ShamanClient: :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return self.request('GET', url, **kwargs) + kwargs.setdefault("allow_redirects", True) + return self.request("GET", url, **kwargs) def options(self, url, **kwargs): r"""Sends a OPTIONS request. Returns :class:`Response` object. @@ -67,8 +67,8 @@ class ShamanClient: :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', True) - return self.request('OPTIONS', url, **kwargs) + kwargs.setdefault("allow_redirects", True) + return self.request("OPTIONS", url, **kwargs) def head(self, url, **kwargs): r"""Sends a HEAD request. Returns :class:`Response` object. @@ -78,8 +78,8 @@ class ShamanClient: :rtype: requests.Response """ - kwargs.setdefault('allow_redirects', False) - return self.request('HEAD', url, **kwargs) + kwargs.setdefault("allow_redirects", False) + return self.request("HEAD", url, **kwargs) def post(self, url, data=None, json=None, **kwargs): r"""Sends a POST request. Returns :class:`Response` object. @@ -92,7 +92,7 @@ class ShamanClient: :rtype: requests.Response """ - return self.request('POST', url, data=data, json=json, **kwargs) + return self.request("POST", url, data=data, json=json, **kwargs) def put(self, url, data=None, **kwargs): r"""Sends a PUT request. Returns :class:`Response` object. @@ -104,7 +104,7 @@ class ShamanClient: :rtype: requests.Response """ - return self.request('PUT', url, data=data, **kwargs) + return self.request("PUT", url, data=data, **kwargs) def patch(self, url, data=None, **kwargs): r"""Sends a PATCH request. Returns :class:`Response` object. @@ -116,7 +116,7 @@ class ShamanClient: :rtype: requests.Response """ - return self.request('PATCH', url, data=data, **kwargs) + return self.request("PATCH", url, data=data, **kwargs) def delete(self, url, **kwargs): r"""Sends a DELETE request. Returns :class:`Response` object. @@ -126,4 +126,4 @@ class ShamanClient: :rtype: requests.Response """ - return self.request('DELETE', url, **kwargs) + return self.request("DELETE", url, **kwargs) diff --git a/pack/shaman/transfer.py b/pack/shaman/transfer.py index 20e6821..cd5a6cd 100644 --- a/pack/shaman/transfer.py +++ b/pack/shaman/transfer.py @@ -48,9 +48,15 @@ class ShamanTransferrer(bat_transfer.FileTransferer): class AbortUpload(Exception): """Raised from the upload callback to abort an upload.""" - def __init__(self, auth_token: str, project_root: pathlib.Path, - shaman_endpoint: str, checkout_id: str) -> None: + def __init__( + self, + auth_token: str, + project_root: pathlib.Path, + shaman_endpoint: str, + checkout_id: str, + ) -> None: from . import client + super().__init__() self.client = client.ShamanClient(auth_token, shaman_endpoint) self.project_root = project_root @@ -63,7 +69,7 @@ class ShamanTransferrer(bat_transfer.FileTransferer): # checkout. This can then be combined with the project-relative path # of the to-be-rendered blend file (e.g. the one 'bat pack' was pointed # at). - self._checkout_location = '' + self._checkout_location = "" self.uploaded_files = 0 self.uploaded_bytes = 0 @@ -76,24 +82,32 @@ class ShamanTransferrer(bat_transfer.FileTransferer): # Construct the Shaman Checkout Definition file. # This blocks until we know the entire list of files to transfer. - definition_file, allowed_relpaths, delete_when_done = self._create_checkout_definition() + ( + definition_file, + allowed_relpaths, + delete_when_done, + ) = self._create_checkout_definition() if not definition_file: # An error has already been logged. return - self.log.info('Created checkout definition file of %d KiB', - len(definition_file) // 1024) - self.log.info('Feeding %d files to the Shaman', len(self._file_info)) + self.log.info( + "Created checkout definition file of %d KiB", + len(definition_file) // 1024, + ) + self.log.info("Feeding %d files to the Shaman", len(self._file_info)) if self.log.isEnabledFor(logging.INFO): for path in self._file_info: - self.log.info(' - %s', path) + self.log.info(" - %s", path) # Try to upload all the files. failed_paths = set() # type: typing.Set[str] max_tries = 50 for try_index in range(max_tries): # Send the file to the Shaman and see what we still need to send there. - to_upload = self._send_checkout_def_to_shaman(definition_file, allowed_relpaths) + to_upload = self._send_checkout_def_to_shaman( + definition_file, allowed_relpaths + ) if to_upload is None: # An error has already been logged. return @@ -102,7 +116,7 @@ class ShamanTransferrer(bat_transfer.FileTransferer): break # Send the files that still need to be sent. - self.log.info('Upload attempt %d', try_index + 1) + self.log.info("Upload attempt %d", try_index + 1) failed_paths = self._upload_files(to_upload) if not failed_paths: break @@ -113,11 +127,13 @@ class ShamanTransferrer(bat_transfer.FileTransferer): # file to the Shaman and obtain a new list of files to upload. if failed_paths: - self.log.error('Aborting upload due to too many failures') - self.error_set('Giving up after %d attempts to upload the files' % max_tries) + self.log.error("Aborting upload due to too many failures") + self.error_set( + "Giving up after %d attempts to upload the files" % max_tries + ) return - self.log.info('All files uploaded succesfully') + self.log.info("All files uploaded succesfully") self._request_checkout(definition_file) # Delete the files that were supposed to be moved. @@ -127,12 +143,13 @@ class ShamanTransferrer(bat_transfer.FileTransferer): except Exception as ex: # We have to catch exceptions in a broad way, as this is running in # a separate thread, and exceptions won't otherwise be seen. - self.log.exception('Error transferring files to Shaman') - self.error_set('Unexpected exception transferring files to Shaman: %s' % ex) + self.log.exception("Error transferring files to Shaman") + self.error_set("Unexpected exception transferring files to Shaman: %s" % ex) # noinspection PyBroadException - def _create_checkout_definition(self) \ - -> typing.Tuple[bytes, typing.Set[str], typing.List[pathlib.Path]]: + def _create_checkout_definition( + self, + ) -> typing.Tuple[bytes, typing.Set[str], typing.List[pathlib.Path]]: """Create the checkout definition file for this BAT pack. :returns: the checkout definition (as bytes), a set of paths in that file, @@ -162,8 +179,8 @@ class ShamanTransferrer(bat_transfer.FileTransferer): filesize=filesize, abspath=src, ) - line = '%s %s %s' % (checksum, filesize, relpath) - definition_lines.append(line.encode('utf8')) + line = "%s %s %s" % (checksum, filesize, relpath) + definition_lines.append(line.encode("utf8")) relpaths.add(relpath) if act == bat_transfer.Action.MOVE: @@ -171,7 +188,7 @@ class ShamanTransferrer(bat_transfer.FileTransferer): except Exception: # We have to catch exceptions in a broad way, as this is running in # a separate thread, and exceptions won't otherwise be seen. - msg = 'Error transferring %s to %s' % (src, dst) + msg = "Error transferring %s to %s" % (src, dst) self.log.exception(msg) # Put the files to copy back into the queue, and abort. This allows # the main thread to inspect the queue and see which files were not @@ -179,35 +196,39 @@ class ShamanTransferrer(bat_transfer.FileTransferer): # be reported there. self.queue.put((src, dst, act)) self.error_set(msg) - return b'', set(), delete_when_done + return b"", set(), delete_when_done cache.cleanup_cache() - return b'\n'.join(definition_lines), relpaths, delete_when_done + return b"\n".join(definition_lines), relpaths, delete_when_done - def _send_checkout_def_to_shaman(self, definition_file: bytes, - allowed_relpaths: typing.Set[str]) \ - -> typing.Optional[collections.deque]: + def _send_checkout_def_to_shaman( + self, definition_file: bytes, allowed_relpaths: typing.Set[str] + ) -> typing.Optional[collections.deque]: """Send the checkout definition file to the Shaman. :return: An iterable of paths (relative to the project root) that still need to be uploaded, or None if there was an error. """ - resp = self.client.post('checkout/requirements', data=definition_file, stream=True, - headers={'Content-Type': 'text/plain'}, - timeout=15) + resp = self.client.post( + "checkout/requirements", + data=definition_file, + stream=True, + headers={"Content-Type": "text/plain"}, + timeout=15, + ) if resp.status_code >= 300: - msg = 'Error from Shaman, code %d: %s' % (resp.status_code, resp.text) + msg = "Error from Shaman, code %d: %s" % (resp.status_code, resp.text) self.log.error(msg) self.error_set(msg) return None to_upload = collections.deque() # type: collections.deque for line in resp.iter_lines(): - response, path = line.decode().split(' ', 1) - self.log.debug(' %s: %s', response, path) + response, path = line.decode().split(" ", 1) + self.log.debug(" %s: %s", response, path) if path not in allowed_relpaths: - msg = 'Shaman requested path we did not intend to upload: %r' % path + msg = "Shaman requested path we did not intend to upload: %r" % path self.log.error(msg) self.error_set(msg) return None @@ -216,13 +237,13 @@ class ShamanTransferrer(bat_transfer.FileTransferer): to_upload.appendleft(path) elif response == response_already_uploading: to_upload.append(path) - elif response == 'ERROR': - msg = 'Error from Shaman: %s' % path + elif response == "ERROR": + msg = "Error from Shaman: %s" % path self.log.error(msg) self.error_set(msg) return None else: - msg = 'Unknown response from Shaman for path %r: %r' % (path, response) + msg = "Unknown response from Shaman for path %r: %r" % (path, response) self.log.error(msg) self.error_set(msg) return None @@ -240,7 +261,9 @@ class ShamanTransferrer(bat_transfer.FileTransferer): def defer(some_path: str): nonlocal to_upload - self.log.info(' %s deferred (already being uploaded by someone else)', some_path) + self.log.info( + " %s deferred (already being uploaded by someone else)", some_path + ) deferred_paths.add(some_path) # Instead of deferring this one file, randomize the files to upload. @@ -251,35 +274,41 @@ class ShamanTransferrer(bat_transfer.FileTransferer): to_upload = collections.deque(all_files) if not to_upload: - self.log.info('All %d files are at the Shaman already', len(self._file_info)) + self.log.info( + "All %d files are at the Shaman already", len(self._file_info) + ) self.report_transferred(0) return failed_paths - self.log.info('Going to upload %d of %d files', len(to_upload), len(self._file_info)) + self.log.info( + "Going to upload %d of %d files", len(to_upload), len(self._file_info) + ) while to_upload: # After too many failures, just retry to get a fresh set of files to upload. if len(failed_paths) > MAX_FAILED_PATHS: - self.log.info('Too many failures, going to abort this iteration') + self.log.info("Too many failures, going to abort this iteration") failed_paths.update(to_upload) return failed_paths path = to_upload.popleft() fileinfo = self._file_info[path] - self.log.info(' %s', path) + self.log.info(" %s", path) headers = { - 'X-Shaman-Original-Filename': path, + "X-Shaman-Original-Filename": path, } # Let the Shaman know whether we can defer uploading this file or not. - can_defer = (len(deferred_paths) < MAX_DEFERRED_PATHS - and path not in deferred_paths - and len(to_upload)) + can_defer = ( + len(deferred_paths) < MAX_DEFERRED_PATHS + and path not in deferred_paths + and len(to_upload) + ) if can_defer: - headers['X-Shaman-Can-Defer-Upload'] = 'true' + headers["X-Shaman-Can-Defer-Upload"] = "true" - url = 'files/%s/%d' % (fileinfo.checksum, fileinfo.filesize) + url = "files/%s/%d" % (fileinfo.checksum, fileinfo.filesize) try: - with fileinfo.abspath.open('rb') as infile: + with fileinfo.abspath.open("rb") as infile: resp = self.client.post(url, data=infile, headers=headers) except requests.ConnectionError as ex: @@ -290,7 +319,9 @@ class ShamanTransferrer(bat_transfer.FileTransferer): # connection. defer(path) else: - self.log.info(' %s could not be uploaded, might retry later: %s', path, ex) + self.log.info( + " %s could not be uploaded, might retry later: %s", path, ex + ) failed_paths.add(path) continue @@ -301,12 +332,15 @@ class ShamanTransferrer(bat_transfer.FileTransferer): if can_defer: defer(path) else: - self.log.info(' %s skipped (already existed on the server)', path) + self.log.info(" %s skipped (already existed on the server)", path) continue if resp.status_code >= 300: - msg = 'Error from Shaman uploading %s, code %d: %s' % ( - fileinfo.abspath, resp.status_code, resp.text) + msg = "Error from Shaman uploading %s, code %d: %s" % ( + fileinfo.abspath, + resp.status_code, + resp.text, + ) self.log.error(msg) self.error_set(msg) return failed_paths @@ -318,42 +352,53 @@ class ShamanTransferrer(bat_transfer.FileTransferer): self.report_transferred(file_size) if not failed_paths: - self.log.info('Done uploading %d bytes in %d files', - self.uploaded_bytes, self.uploaded_files) + self.log.info( + "Done uploading %d bytes in %d files", + self.uploaded_bytes, + self.uploaded_files, + ) else: - self.log.info('Uploaded %d bytes in %d files so far', - self.uploaded_bytes, self.uploaded_files) + self.log.info( + "Uploaded %d bytes in %d files so far", + self.uploaded_bytes, + self.uploaded_files, + ) return failed_paths def report_transferred(self, bytes_transferred: int): if self._abort.is_set(): - self.log.warning('Interrupting ongoing upload') - raise self.AbortUpload('interrupting ongoing upload') + self.log.warning("Interrupting ongoing upload") + raise self.AbortUpload("interrupting ongoing upload") super().report_transferred(bytes_transferred) def _request_checkout(self, definition_file: bytes): """Ask the Shaman to create a checkout of this BAT pack.""" if not self.checkout_id: - self.log.warning('NOT requesting checkout at Shaman') + self.log.warning("NOT requesting checkout at Shaman") return - self.log.info('Requesting checkout at Shaman for checkout_id=%r', self.checkout_id) - resp = self.client.post('checkout/create/%s' % self.checkout_id, data=definition_file, - headers={'Content-Type': 'text/plain'}) + self.log.info( + "Requesting checkout at Shaman for checkout_id=%r", self.checkout_id + ) + resp = self.client.post( + "checkout/create/%s" % self.checkout_id, + data=definition_file, + headers={"Content-Type": "text/plain"}, + ) if resp.status_code >= 300: - msg = 'Error from Shaman, code %d: %s' % (resp.status_code, resp.text) + msg = "Error from Shaman, code %d: %s" % (resp.status_code, resp.text) self.log.error(msg) self.error_set(msg) return self._checkout_location = resp.text.strip() - self.log.info('Response from Shaman, code %d: %s', resp.status_code, resp.text) + self.log.info("Response from Shaman, code %d: %s", resp.status_code, resp.text) @property def checkout_location(self) -> str: """Returns the checkout location, or '' if no checkout was made.""" if not self._checkout_location: - return '' + return "" return self._checkout_location diff --git a/pack/transfer.py b/pack/transfer.py index ad26846..82a7125 100644 --- a/pack/transfer.py +++ b/pack/transfer.py @@ -56,7 +56,7 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): def __init__(self) -> None: super().__init__() - self.log = log.getChild('FileTransferer') + self.log = log.getChild("FileTransferer") # For copying in a different process. By using a priority queue the files # are automatically sorted alphabetically, which means we go through all files @@ -67,13 +67,15 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): # maxsize=100 is just a guess as to a reasonable upper limit. When this limit # is reached, the main thread will simply block while waiting for this thread # to finish copying a file. - self.queue = queue.PriorityQueue(maxsize=100) # type: queue.PriorityQueue[QueueItem] + self.queue = queue.PriorityQueue( + maxsize=100 + ) # type: queue.PriorityQueue[QueueItem] self.done = threading.Event() self._abort = threading.Event() # Indicates user-requested abort self.__error_mutex = threading.Lock() self.__error = threading.Event() # Indicates abort due to some error - self.__error_message = '' + self.__error_message = "" # Instantiate a dummy progress callback so that we can call it # without checking for None all the time. @@ -87,8 +89,12 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): def queue_copy(self, src: pathlib.Path, dst: pathlib.PurePath): """Queue a copy action from 'src' to 'dst'.""" - assert not self.done.is_set(), 'Queueing not allowed after done_and_join() was called' - assert not self._abort.is_set(), 'Queueing not allowed after abort_and_join() was called' + assert ( + not self.done.is_set() + ), "Queueing not allowed after done_and_join() was called" + assert ( + not self._abort.is_set() + ), "Queueing not allowed after abort_and_join() was called" if self.__error.is_set(): return self.queue.put((src, dst, Action.COPY)) @@ -96,8 +102,12 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): def queue_move(self, src: pathlib.Path, dst: pathlib.PurePath): """Queue a move action from 'src' to 'dst'.""" - assert not self.done.is_set(), 'Queueing not allowed after done_and_join() was called' - assert not self._abort.is_set(), 'Queueing not allowed after abort_and_join() was called' + assert ( + not self.done.is_set() + ), "Queueing not allowed after done_and_join() was called" + assert ( + not self._abort.is_set() + ), "Queueing not allowed after abort_and_join() was called" if self.__error.is_set(): return self.queue.put((src, dst, Action.MOVE)) @@ -107,7 +117,9 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): """Report transfer of `block_size` bytes.""" self.total_transferred_bytes += bytes_transferred - self.progress_cb.transfer_progress(self.total_queued_bytes, self.total_transferred_bytes) + self.progress_cb.transfer_progress( + self.total_queued_bytes, self.total_transferred_bytes + ) def done_and_join(self) -> None: """Indicate all files have been queued, and wait until done. @@ -128,7 +140,8 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): assert files_remaining raise FileTransferError( "%d files couldn't be transferred" % len(files_remaining), - files_remaining) + files_remaining, + ) def _files_remaining(self) -> typing.List[pathlib.Path]: """Source files that were queued but not transferred.""" @@ -140,7 +153,7 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): def abort(self) -> None: """Abort the file transfer, immediately returns.""" - log.info('Aborting') + log.info("Aborting") self._abort.set() def abort_and_join(self) -> None: @@ -152,8 +165,11 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): files_remaining = self._files_remaining() if not files_remaining: return - log.warning("%d files couldn't be transferred, starting with %s", - len(files_remaining), files_remaining[0]) + log.warning( + "%d files couldn't be transferred, starting with %s", + len(files_remaining), + files_remaining[0], + ) def iter_queue(self) -> typing.Iterable[QueueItem]: """Generator, yield queued items until the work is done.""" @@ -176,13 +192,13 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): if timeout: run_until = time.time() + timeout else: - run_until = float('inf') + run_until = float("inf") # We can't simply block the thread, we have to keep watching the # progress queue. while self.is_alive(): if time.time() > run_until: - self.log.warning('Timeout while waiting for transfer to finish') + self.log.warning("Timeout while waiting for transfer to finish") return self.progress_cb.flush(timeout=0.5) @@ -192,11 +208,11 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): def delete_file(self, path: pathlib.Path): """Deletes a file, only logging a warning if deletion fails.""" - log.debug('Deleting %s, file has been transferred', path) + log.debug("Deleting %s, file has been transferred", path) try: path.unlink() except IOError as ex: - log.warning('Unable to delete %s: %s', path, ex) + log.warning("Unable to delete %s: %s", path, ex) @property def has_error(self) -> bool: @@ -217,5 +233,5 @@ class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): """Retrieve the error messsage, or an empty string if no error occurred.""" with self.__error_mutex: if not self.__error.is_set(): - return '' + return "" return self.__error_message diff --git a/pack/zipped.py b/pack/zipped.py index 3042891..be1bafa 100644 --- a/pack/zipped.py +++ b/pack/zipped.py @@ -30,7 +30,7 @@ from . import Packer, transfer log = logging.getLogger(__name__) # Suffixes to store uncompressed in the zip. -STORE_ONLY = {'.jpg', '.jpeg', '.exr'} +STORE_ONLY = {".jpg", ".jpeg", ".exr"} class ZipPacker(Packer): @@ -58,9 +58,9 @@ class ZipTransferrer(transfer.FileTransferer): zippath = self.zippath.absolute() - with zipfile.ZipFile(str(zippath), 'w') as outzip: + with zipfile.ZipFile(str(zippath), "w") as outzip: for src, dst, act in self.iter_queue(): - assert src.is_absolute(), 'expecting only absolute paths, not %r' % src + assert src.is_absolute(), "expecting only absolute paths, not %r" % src dst = pathlib.Path(dst).absolute() try: @@ -69,18 +69,20 @@ class ZipTransferrer(transfer.FileTransferer): # Don't bother trying to compress already-compressed files. if src.suffix.lower() in STORE_ONLY: compression = zipfile.ZIP_STORED - log.debug('ZIP %s -> %s (uncompressed)', src, relpath) + log.debug("ZIP %s -> %s (uncompressed)", src, relpath) else: compression = zipfile.ZIP_DEFLATED - log.debug('ZIP %s -> %s', src, relpath) - outzip.write(str(src), arcname=str(relpath), compress_type=compression) + log.debug("ZIP %s -> %s", src, relpath) + outzip.write( + str(src), arcname=str(relpath), compress_type=compression + ) if act == transfer.Action.MOVE: self.delete_file(src) except Exception: # We have to catch exceptions in a broad way, as this is running in # a separate thread, and exceptions won't otherwise be seen. - log.exception('Error transferring %s to %s', src, dst) + log.exception("Error transferring %s to %s", src, dst) # Put the files to copy back into the queue, and abort. This allows # the main thread to inspect the queue and see which files were not # copied. The one we just failed (due to this exception) should also diff --git a/trace/__init__.py b/trace/__init__.py index c574f09..f88f5f3 100644 --- a/trace/__init__.py +++ b/trace/__init__.py @@ -28,28 +28,31 @@ log = logging.getLogger(__name__) codes_to_skip = { # These blocks never have external assets: - b'ID', b'WM', b'SN', - + b"ID", + b"WM", + b"SN", # These blocks are skipped for now, until we have proof they point to # assets otherwise missed: - b'GR', b'WO', b'BR', b'LS', + b"GR", + b"WO", + b"BR", + b"LS", } -def deps(bfilepath: pathlib.Path, progress_cb: typing.Optional[progress.Callback] = None) \ - -> typing.Iterator[result.BlockUsage]: +def deps( + bfilepath: pathlib.Path, progress_cb: typing.Optional[progress.Callback] = None +) -> typing.Iterator[result.BlockUsage]: """Open the blend file and report its dependencies. :param bfilepath: File to open. :param progress_cb: Progress callback object. """ - log.info('opening: %s', bfilepath) - bfile = blendfile.open_cached(bfilepath) - bi = file2blocks.BlockIterator() if progress_cb: bi.progress_cb = progress_cb + bfile = bi.open_blendfile(bfilepath) # Remember which block usages we've reported already, without keeping the # blocks themselves in memory. @@ -64,8 +67,9 @@ def deps(bfilepath: pathlib.Path, progress_cb: typing.Optional[progress.Callback yield block_usage -def asset_holding_blocks(blocks: typing.Iterable[blendfile.BlendFileBlock]) \ - -> typing.Iterator[blendfile.BlendFileBlock]: +def asset_holding_blocks( + blocks: typing.Iterable[blendfile.BlendFileBlock], +) -> typing.Iterator[blendfile.BlendFileBlock]: """Generator, yield data blocks that could reference external assets.""" for block in blocks: assert isinstance(block, blendfile.BlendFileBlock) diff --git a/trace/blocks2assets.py b/trace/blocks2assets.py index b58a8c8..962f82a 100644 --- a/trace/blocks2assets.py +++ b/trace/blocks2assets.py @@ -39,17 +39,17 @@ _funcs_for_code = {} # type: typing.Dict[bytes, typing.Callable] def iter_assets(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Generator, yield the assets used by this data block.""" - assert block.code != b'DATA' + assert block.code != b"DATA" try: block_reader = _funcs_for_code[block.code] except KeyError: if block.code not in _warned_about_types: - log.debug('No reader implemented for block type %r', block.code.decode()) + log.debug("No reader implemented for block type %r", block.code.decode()) _warned_about_types.add(block.code) return - log.debug('Tracing block %r', block) + log.debug("Tracing block %r", block) yield from block_reader(block) @@ -70,8 +70,8 @@ def skip_packed(wrapped): @functools.wraps(wrapped) def wrapper(block: blendfile.BlendFileBlock, *args, **kwargs): - if block.get(b'packedfile', default=False): - log.debug('Datablock %r is packed; skipping', block.id_name) + if block.get(b"packedfile", default=False): + log.debug("Datablock %r is packed; skipping", block.id_name) return yield from wrapped(block, *args, **kwargs) @@ -79,40 +79,38 @@ def skip_packed(wrapped): return wrapper -@dna_code('CF') +@dna_code("CF") def cache_file(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Cache file data blocks.""" - path, field = block.get(b'filepath', return_field=True) + path, field = block.get(b"filepath", return_field=True) yield result.BlockUsage(block, path, path_full_field=field) -@dna_code('IM') +@dna_code("IM") @skip_packed def image(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Image data blocks.""" # old files miss this - image_source = block.get(b'source', default=cdefs.IMA_SRC_FILE) - #print('------image_source: ', image_source) - #if image_source not in {cdefs.IMA_SRC_FILE, cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_MOVIE}: - # return - if image_source not in {cdefs.IMA_SRC_FILE, cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_MOVIE, cdefs.IMA_SRC_TILED}: + image_source = block.get(b"source", default=cdefs.IMA_SRC_FILE) + if image_source not in { + cdefs.IMA_SRC_FILE, + cdefs.IMA_SRC_SEQUENCE, + cdefs.IMA_SRC_MOVIE, + cdefs.IMA_SRC_TILED, + }: + log.debug("skiping image source type %s", image_source) return - pathname, field = block.get(b'name', return_field=True) - #is_sequence = image_source == cdefs.IMA_SRC_SEQUENCE - if image_source in {cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_TILED}: - is_sequence = True - else: - is_sequence = False + pathname, field = block.get(b"name", return_field=True) + is_sequence = image_source in {cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_TILED} - #print('is_sequence: ', is_sequence) yield result.BlockUsage(block, pathname, is_sequence, path_full_field=field) -@dna_code('LI') +@dna_code("LI") def library(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Library data blocks.""" - path, field = block.get(b'name', return_field=True) + path, field = block.get(b"name", return_field=True) yield result.BlockUsage(block, path, path_full_field=field) # The 'filepath' also points to the blend file. However, this is set to the @@ -120,37 +118,37 @@ def library(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsag # is thus not a property we have to report or rewrite. -@dna_code('ME') +@dna_code("ME") def mesh(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Mesh data blocks.""" - block_external = block.get_pointer((b'ldata', b'external'), None) + block_external = block.get_pointer((b"ldata", b"external"), None) if block_external is None: - block_external = block.get_pointer((b'fdata', b'external'), None) + block_external = block.get_pointer((b"fdata", b"external"), None) if block_external is None: return - path, field = block_external.get(b'filename', return_field=True) + path, field = block_external.get(b"filename", return_field=True) yield result.BlockUsage(block, path, path_full_field=field) -@dna_code('MC') +@dna_code("MC") def movie_clip(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """MovieClip data blocks.""" - path, field = block.get(b'name', return_field=True) + path, field = block.get(b"name", return_field=True) # TODO: The assumption that this is not a sequence may not be true for all modifiers. yield result.BlockUsage(block, path, is_sequence=False, path_full_field=field) -@dna_code('OB') +@dna_code("OB") def object_block(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Object data blocks.""" ctx = modifier_walkers.ModifierContext(owner=block) # 'ob->modifiers[...].filepath' for mod_idx, block_mod in enumerate(iterators.modifiers(block)): - block_name = b'%s.modifiers[%d]' % (block.id_name, mod_idx) - mod_type = block_mod[b'modifier', b'type'] - log.debug('Tracing modifier %s, type=%d', block_name.decode(), mod_type) + block_name = b"%s.modifiers[%d]" % (block.id_name, mod_idx) + mod_type = block_mod[b"modifier", b"type"] + log.debug("Tracing modifier %s, type=%d", block_name.decode(), mod_type) try: mod_handler = modifier_walkers.modifier_handlers[mod_type] @@ -159,52 +157,59 @@ def object_block(block: blendfile.BlendFileBlock) -> typing.Iterator[result.Bloc yield from mod_handler(ctx, block_mod, block_name) -@dna_code('SC') +@dna_code("SC") def scene(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Scene data blocks.""" # Sequence editor is the only interesting bit. - block_ed = block.get_pointer(b'ed') + block_ed = block.get_pointer(b"ed") if block_ed is None: return - single_asset_types = {cdefs.SEQ_TYPE_MOVIE, cdefs.SEQ_TYPE_SOUND_RAM, cdefs.SEQ_TYPE_SOUND_HD} + single_asset_types = { + cdefs.SEQ_TYPE_MOVIE, + cdefs.SEQ_TYPE_SOUND_RAM, + cdefs.SEQ_TYPE_SOUND_HD, + } asset_types = single_asset_types.union({cdefs.SEQ_TYPE_IMAGE}) for seq, seq_type in iterators.sequencer_strips(block_ed): if seq_type not in asset_types: continue - seq_strip = seq.get_pointer(b'strip') + seq_strip = seq.get_pointer(b"strip") if seq_strip is None: continue - seq_stripdata = seq_strip.get_pointer(b'stripdata') + seq_stripdata = seq_strip.get_pointer(b"stripdata") if seq_stripdata is None: continue - dirname, dn_field = seq_strip.get(b'dir', return_field=True) - basename, bn_field = seq_stripdata.get(b'name', return_field=True) + dirname, dn_field = seq_strip.get(b"dir", return_field=True) + basename, bn_field = seq_stripdata.get(b"name", return_field=True) asset_path = bpathlib.BlendPath(dirname) / basename is_sequence = seq_type not in single_asset_types - yield result.BlockUsage(seq_strip, asset_path, - is_sequence=is_sequence, - path_dir_field=dn_field, - path_base_field=bn_field) + yield result.BlockUsage( + seq_strip, + asset_path, + is_sequence=is_sequence, + path_dir_field=dn_field, + path_base_field=bn_field, + ) -@dna_code('SO') +@dna_code("SO") @skip_packed def sound(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Sound data blocks.""" - path, field = block.get(b'name', return_field=True) + path, field = block.get(b"name", return_field=True) yield result.BlockUsage(block, path, path_full_field=field) -@dna_code('VF') +@dna_code("VF") @skip_packed def vector_font(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: """Vector Font data blocks.""" - path, field = block.get(b'name', return_field=True) - if path == b'': # builtin font + path, field = block.get(b"name", return_field=True) + if path == b"": # builtin font return yield result.BlockUsage(block, path, path_full_field=field) diff --git a/trace/expanders.py b/trace/expanders.py index 1f66387..8b101d2 100644 --- a/trace/expanders.py +++ b/trace/expanders.py @@ -30,26 +30,36 @@ from blender_asset_tracer import blendfile, cdefs from blender_asset_tracer.blendfile import iterators # Don't warn about these types at all. -_warned_about_types = {b'LI', b'DATA'} +_warned_about_types = {b"LI", b"DATA"} _funcs_for_code = {} # type: typing.Dict[bytes, typing.Callable] log = logging.getLogger(__name__) -def expand_block(block: blendfile.BlendFileBlock) -> typing.Iterator[blendfile.BlendFileBlock]: +def expand_block( + block: blendfile.BlendFileBlock, +) -> typing.Iterator[blendfile.BlendFileBlock]: """Generator, yield the data blocks used by this data block.""" try: expander = _funcs_for_code[block.code] except KeyError: if block.code not in _warned_about_types: - log.debug('No expander implemented for block type %r', block.code.decode()) + log.debug("No expander implemented for block type %r", block.code.decode()) _warned_about_types.add(block.code) return - log.debug('Expanding block %r', block) - # Filter out falsy blocks, i.e. None values. - # Allowing expanders to yield None makes them more consise. - yield from filter(None, expander(block)) + log.debug("Expanding block %r", block) + for dependency in expander(block): + if not dependency: + # Filter out falsy blocks, i.e. None values. + # Allowing expanders to yield None makes them more consise. + continue + if dependency.code == b"DATA": + log.warn( + "expander yielded block %s which will be ignored in later iteration", + dependency, + ) + yield dependency def dna_code(block_code: str): @@ -65,83 +75,131 @@ def dna_code(block_code: str): def _expand_generic_material(block: blendfile.BlendFileBlock): - array_len = block.get(b'totcol') - yield from block.iter_array_of_pointers(b'mat', array_len) + array_len = block.get(b"totcol") + yield from block.iter_array_of_pointers(b"mat", array_len) def _expand_generic_mtex(block: blendfile.BlendFileBlock): - if not block.dna_type.has_field(b'mtex'): + if not block.dna_type.has_field(b"mtex"): # mtex was removed in Blender 2.8 return - for mtex in block.iter_fixed_array_of_pointers(b'mtex'): - yield mtex.get_pointer(b'tex') - yield mtex.get_pointer(b'object') + for mtex in block.iter_fixed_array_of_pointers(b"mtex"): + yield mtex.get_pointer(b"tex") + yield mtex.get_pointer(b"object") def _expand_generic_nodetree(block: blendfile.BlendFileBlock): - assert block.dna_type.dna_type_id == b'bNodeTree' + assert block.dna_type.dna_type_id == b"bNodeTree" + + nodes = block.get_pointer((b"nodes", b"first")) + + # See DNA_node_types.h + socket_types_with_value_pointer = { + cdefs.SOCK_OBJECT, # bNodeSocketValueObject + cdefs.SOCK_IMAGE, # bNodeSocketValueImage + cdefs.SOCK_COLLECTION, # bNodeSocketValueCollection + cdefs.SOCK_TEXTURE, # bNodeSocketValueTexture + cdefs.SOCK_MATERIAL, # bNodeSocketValueMaterial + } - nodes = block.get_pointer((b'nodes', b'first')) for node in iterators.listbase(nodes): - if node[b'type'] == cdefs.CMP_NODE_R_LAYERS: + if node[b"type"] == cdefs.CMP_NODE_R_LAYERS: continue - yield node # The 'id' property points to whatever is used by the node # (like the image in an image texture node). - yield node.get_pointer(b'id') + yield node.get_pointer(b"id") + + # Default values of inputs can also point to ID datablocks. + inputs = node.get_pointer((b"inputs", b"first")) + for input in iterators.listbase(inputs): + if input[b"type"] not in socket_types_with_value_pointer: + continue + value_container = input.get_pointer(b"default_value") + if not value_container: + continue + value = value_container.get_pointer(b"value") + yield value + + +def _expand_generic_idprops(block: blendfile.BlendFileBlock): + """Yield ID datablocks and their libraries referenced from ID properties.""" + + # TODO(@sybren): this code is very crude, and happens to work on ID + # properties of Geometry Nodes modifiers, which is what it was written for. + # It should probably be rewritten to properly iterate over & recurse into + # all groups. + settings_props = block.get_pointer((b"settings", b"properties")) + if not settings_props: + return + + subprops = settings_props.get_pointer((b"data", b"group", b"first")) + for idprop in iterators.listbase(subprops): + if idprop[b"type"] != cdefs.IDP_ID: + continue + id_datablock = idprop.get_pointer((b"data", b"pointer")) + if not id_datablock: + continue + yield id_datablock def _expand_generic_nodetree_id(block: blendfile.BlendFileBlock): - block_ntree = block.get_pointer(b'nodetree', None) + block_ntree = block.get_pointer(b"nodetree", None) if block_ntree is not None: yield from _expand_generic_nodetree(block_ntree) def _expand_generic_animdata(block: blendfile.BlendFileBlock): - block_adt = block.get_pointer(b'adt') + block_adt = block.get_pointer(b"adt") if block_adt: - yield block_adt.get_pointer(b'action') + yield block_adt.get_pointer(b"action") # TODO, NLA -@dna_code('AR') +@dna_code("AR") def _expand_armature(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) -@dna_code('CU') +@dna_code("CU") def _expand_curve(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_material(block) - for fieldname in (b'vfont', b'vfontb', b'vfonti', b'vfontbi', - b'bevobj', b'taperobj', b'textoncurve'): + for fieldname in ( + b"vfont", + b"vfontb", + b"vfonti", + b"vfontbi", + b"bevobj", + b"taperobj", + b"textoncurve", + ): yield block.get_pointer(fieldname) -@dna_code('GR') +@dna_code("GR") def _expand_group(block: blendfile.BlendFileBlock): - log.debug('Collection/group Block: %s (name=%s)', block, block.id_name) + log.debug("Collection/group Block: %s (name=%s)", block, block.id_name) - objects = block.get_pointer((b'gobject', b'first')) + objects = block.get_pointer((b"gobject", b"first")) for item in iterators.listbase(objects): - yield item.get_pointer(b'ob') + yield item.get_pointer(b"ob") # Recurse through child collections. try: - children = block.get_pointer((b'children', b'first')) + children = block.get_pointer((b"children", b"first")) except KeyError: # 'children' was introduced in Blender 2.8 collections pass else: for child in iterators.listbase(children): - subcoll = child.get_pointer(b'collection') + subcoll = child.get_pointer(b"collection") if subcoll is None: continue - if subcoll.dna_type_id == b'ID': + if subcoll.dna_type_id == b"ID": # This issue happened while recursing a linked-in 'Hidden' # collection in the Chimes set of the Spring project. Such # collections named 'Hidden' were apparently created while @@ -150,127 +208,132 @@ def _expand_group(block: blendfile.BlendFileBlock): yield subcoll continue - log.debug('recursing into child collection %s (name=%r, type=%r)', - subcoll, subcoll.id_name, subcoll.dna_type_name) + log.debug( + "recursing into child collection %s (name=%r, type=%r)", + subcoll, + subcoll.id_name, + subcoll.dna_type_name, + ) yield from _expand_group(subcoll) -@dna_code('LA') +@dna_code("LA") def _expand_lamp(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_nodetree_id(block) yield from _expand_generic_mtex(block) -@dna_code('MA') +@dna_code("MA") def _expand_material(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_nodetree_id(block) yield from _expand_generic_mtex(block) try: - yield block.get_pointer(b'group') + yield block.get_pointer(b"group") except KeyError: # Groups were removed from Blender 2.8 pass -@dna_code('MB') +@dna_code("MB") def _expand_metaball(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_material(block) -@dna_code('ME') +@dna_code("ME") def _expand_mesh(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_material(block) - yield block.get_pointer(b'texcomesh') + yield block.get_pointer(b"texcomesh") # TODO, TexFace? - it will be slow, we could simply ignore :S -@dna_code('NT') +@dna_code("NT") def _expand_node_tree(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_nodetree(block) -@dna_code('OB') +@dna_code("OB") def _expand_object(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_material(block) - yield block.get_pointer(b'data') + yield block.get_pointer(b"data") - if block[b'transflag'] & cdefs.OB_DUPLIGROUP: - yield block.get_pointer(b'dup_group') + if block[b"transflag"] & cdefs.OB_DUPLIGROUP: + yield block.get_pointer(b"dup_group") - yield block.get_pointer(b'proxy') - yield block.get_pointer(b'proxy_group') + yield block.get_pointer(b"proxy") + yield block.get_pointer(b"proxy_group") # 'ob->pose->chanbase[...].custom' - block_pose = block.get_pointer(b'pose') + block_pose = block.get_pointer(b"pose") if block_pose: - assert block_pose.dna_type.dna_type_id == b'bPose' + assert block_pose.dna_type.dna_type_id == b"bPose" # sdna_index_bPoseChannel = block_pose.file.sdna_index_from_id[b'bPoseChannel'] - channels = block_pose.get_pointer((b'chanbase', b'first')) + channels = block_pose.get_pointer((b"chanbase", b"first")) for pose_chan in iterators.listbase(channels): - yield pose_chan.get_pointer(b'custom') + yield pose_chan.get_pointer(b"custom") # Expand the objects 'ParticleSettings' via 'ob->particlesystem[...].part' # sdna_index_ParticleSystem = block.file.sdna_index_from_id.get(b'ParticleSystem') # if sdna_index_ParticleSystem is not None: - psystems = block.get_pointer((b'particlesystem', b'first')) + psystems = block.get_pointer((b"particlesystem", b"first")) for psystem in iterators.listbase(psystems): - yield psystem.get_pointer(b'part') + yield psystem.get_pointer(b"part") # Modifiers can also refer to other datablocks, which should also get expanded. for block_mod in iterators.modifiers(block): - mod_type = block_mod[b'modifier', b'type'] + mod_type = block_mod[b"modifier", b"type"] # Currently only node groups are supported. If the support should expand # to more types, something more intelligent than this should be made. if mod_type == cdefs.eModifierType_Nodes: - yield block_mod.get_pointer(b'node_group') + yield from _expand_generic_idprops(block_mod) + yield block_mod.get_pointer(b"node_group") -@dna_code('PA') +@dna_code("PA") def _expand_particle_settings(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_mtex(block) - block_ren_as = block[b'ren_as'] + block_ren_as = block[b"ren_as"] if block_ren_as == cdefs.PART_DRAW_GR: - yield block.get_pointer(b'dup_group') + yield block.get_pointer(b"dup_group") elif block_ren_as == cdefs.PART_DRAW_OB: - yield block.get_pointer(b'dup_ob') + yield block.get_pointer(b"dup_ob") -@dna_code('SC') +@dna_code("SC") def _expand_scene(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_nodetree_id(block) - yield block.get_pointer(b'camera') - yield block.get_pointer(b'world') - yield block.get_pointer(b'set', default=None) - yield block.get_pointer(b'clip', default=None) + yield block.get_pointer(b"camera") + yield block.get_pointer(b"world") + yield block.get_pointer(b"set", default=None) + yield block.get_pointer(b"clip", default=None) # sdna_index_Base = block.file.sdna_index_from_id[b'Base'] # for item in bf_utils.iter_ListBase(block.get_pointer((b'base', b'first'))): # yield item.get_pointer(b'object', sdna_index_refine=sdna_index_Base) - bases = block.get_pointer((b'base', b'first')) + bases = block.get_pointer((b"base", b"first")) for base in iterators.listbase(bases): - yield base.get_pointer(b'object') + yield base.get_pointer(b"object") # Sequence Editor - block_ed = block.get_pointer(b'ed') + block_ed = block.get_pointer(b"ed") if not block_ed: return strip_type_to_field = { - cdefs.SEQ_TYPE_SCENE: b'scene', - cdefs.SEQ_TYPE_MOVIECLIP: b'clip', - cdefs.SEQ_TYPE_MASK: b'mask', - cdefs.SEQ_TYPE_SOUND_RAM: b'sound', + cdefs.SEQ_TYPE_SCENE: b"scene", + cdefs.SEQ_TYPE_MOVIECLIP: b"clip", + cdefs.SEQ_TYPE_MASK: b"mask", + cdefs.SEQ_TYPE_SOUND_RAM: b"sound", } for strip, strip_type in iterators.sequencer_strips(block_ed): try: @@ -280,14 +343,14 @@ def _expand_scene(block: blendfile.BlendFileBlock): yield strip.get_pointer(field_name) -@dna_code('TE') +@dna_code("TE") def _expand_texture(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_nodetree_id(block) - yield block.get_pointer(b'ima') + yield block.get_pointer(b"ima") -@dna_code('WO') +@dna_code("WO") def _expand_world(block: blendfile.BlendFileBlock): yield from _expand_generic_animdata(block) yield from _expand_generic_nodetree_id(block) diff --git a/trace/file2blocks.py b/trace/file2blocks.py index 944edfa..af5ff2e 100644 --- a/trace/file2blocks.py +++ b/trace/file2blocks.py @@ -65,14 +65,21 @@ class BlockIterator: self.progress_cb = progress.Callback() - def iter_blocks(self, - bfile: blendfile.BlendFile, - limit_to: typing.Set[blendfile.BlendFileBlock] = set(), - ) -> typing.Iterator[blendfile.BlendFileBlock]: + def open_blendfile(self, bfilepath: pathlib.Path) -> blendfile.BlendFile: + """Open a blend file, sending notification about this to the progress callback.""" + + log.info("opening: %s", bfilepath) + self.progress_cb.trace_blendfile(bfilepath) + return blendfile.open_cached(bfilepath) + + def iter_blocks( + self, + bfile: blendfile.BlendFile, + limit_to: typing.Set[blendfile.BlendFileBlock] = set(), + ) -> typing.Iterator[blendfile.BlendFileBlock]: """Expand blocks with dependencies from other libraries.""" - self.progress_cb.trace_blendfile(bfile.filepath) - log.info('inspecting: %s', bfile.filepath) + log.info("inspecting: %s", bfile.filepath) if limit_to: self._queue_named_blocks(bfile, limit_to) else: @@ -94,14 +101,14 @@ class BlockIterator: if (bpath, block.addr_old) in self.blocks_yielded: continue - if block.code == b'ID': + if block.code == b"ID": # ID blocks represent linked-in assets. Those are the ones that # should be loaded from their own blend file and "expanded" to # the entire set of data blocks required to render them. We # defer the handling of those so that we can work with one # blend file at a time. - lib = block.get_pointer(b'lib') - lib_bpath = bpathlib.BlendPath(lib[b'name']).absolute(root_dir) + lib = block.get_pointer(b"lib") + lib_bpath = bpathlib.BlendPath(lib[b"name"]).absolute(root_dir) blocks_per_lib[lib_bpath].add(block) # The library block itself should also be reported, because it @@ -109,11 +116,7 @@ class BlockIterator: self.to_visit.put(lib) continue - if limit_to: - # We're limiting the blocks, so we have to expand them to make - # sure we don't miss anything. Otherwise we're yielding the - # entire file anyway, and no expansion is necessary. - self._queue_dependencies(block) + self._queue_dependencies(block) self.blocks_yielded.add((bpath, block.addr_old)) yield block @@ -125,27 +128,26 @@ class BlockIterator: for lib_bpath, idblocks in blocks_per_lib.items(): lib_path = bpathlib.make_absolute(lib_bpath.to_path()) - #assert lib_path.exists() if not lib_path.exists(): - log.warning('Library %s does not exist', lib_path) + log.warning("Library %s does not exist", lib_path) continue - log.debug('Expanding %d blocks in %s', len(idblocks), lib_path) - libfile = blendfile.open_cached(lib_path) + log.debug("Expanding %d blocks in %s", len(idblocks), lib_path) + libfile = self.open_blendfile(lib_path) yield from self.iter_blocks(libfile, idblocks) def _queue_all_blocks(self, bfile: blendfile.BlendFile): - log.debug('Queueing all blocks from file %s', bfile.filepath) + log.debug("Queueing all blocks from file %s", bfile.filepath) for block in bfile.blocks: # Don't bother visiting DATA blocks, as we won't know what # to do with them anyway. - if block.code == b'DATA': + if block.code == b"DATA": continue self.to_visit.put(block) - def _queue_named_blocks(self, - bfile: blendfile.BlendFile, - limit_to: typing.Set[blendfile.BlendFileBlock]): + def _queue_named_blocks( + self, bfile: blendfile.BlendFile, limit_to: typing.Set[blendfile.BlendFileBlock] + ): """Queue only the blocks referred to in limit_to. :param bfile: @@ -155,22 +157,25 @@ class BlockIterator: """ for to_find in limit_to: - assert to_find.code == b'ID' - name_to_find = to_find[b'name'] + assert to_find.code == b"ID" + name_to_find = to_find[b"name"] code = name_to_find[:2] - log.debug('Finding block %r with code %r', name_to_find, code) + log.debug("Finding block %r with code %r", name_to_find, code) same_code = bfile.find_blocks_from_code(code) for block in same_code: if block.id_name == name_to_find: - log.debug('Queueing %r from file %s', block, bfile.filepath) + log.debug("Queueing %r from file %s", block, bfile.filepath) self.to_visit.put(block) def _queue_dependencies(self, block: blendfile.BlendFileBlock): for block in expanders.expand_block(block): + assert isinstance(block, blendfile.BlendFileBlock), "unexpected %r" % block self.to_visit.put(block) -def iter_blocks(bfile: blendfile.BlendFile) -> typing.Iterator[blendfile.BlendFileBlock]: +def iter_blocks( + bfile: blendfile.BlendFile, +) -> typing.Iterator[blendfile.BlendFileBlock]: """Generator, yield all blocks in this file + required blocks in libs.""" bi = BlockIterator() yield from bi.iter_blocks(bfile) diff --git a/trace/file_sequence.py b/trace/file_sequence.py index 2c61ba3..f5d4344 100644 --- a/trace/file_sequence.py +++ b/trace/file_sequence.py @@ -39,13 +39,19 @@ def expand_sequence(path: pathlib.Path) -> typing.Iterator[pathlib.Path]: or the path of the first file in the sequence. """ - if '*' in str(path): # assume it is a glob + if "" in path.name: # UDIM tiles + # Change marker to a glob pattern, then let the glob case handle it. + # This assumes that all files that match the glob are actually UDIM + # tiles; this could cause some false-positives. + path = path.with_name(path.name.replace("", "*")) + + if "*" in str(path): # assume it is a glob import glob - log.debug('expanding glob %s', path) + + log.debug("expanding glob %s", path) for fname in sorted(glob.glob(str(path), recursive=True)): yield pathlib.Path(fname) return - if not path.exists(): raise DoesNotExist(path) @@ -53,9 +59,10 @@ def expand_sequence(path: pathlib.Path) -> typing.Iterator[pathlib.Path]: yield path return - log.debug('expanding file sequence %s', path) + log.debug("expanding file sequence %s", path) import string + stem_no_digits = path.stem.rstrip(string.digits) if stem_no_digits == path.stem: # Just a single file, no digits here. @@ -65,5 +72,5 @@ def expand_sequence(path: pathlib.Path) -> typing.Iterator[pathlib.Path]: # Return everything start starts with 'stem_no_digits' and ends with the # same suffix as the first file. This may result in more files than used # by Blender, but at least it shouldn't miss any. - pattern = '%s*%s' % (stem_no_digits, path.suffix) + pattern = "%s*%s" % (stem_no_digits, path.suffix) yield from sorted(path.parent.glob(pattern)) diff --git a/trace/modifier_walkers.py b/trace/modifier_walkers.py index 0e5f27a..12a0468 100644 --- a/trace/modifier_walkers.py +++ b/trace/modifier_walkers.py @@ -37,8 +37,9 @@ class ModifierContext: Currently just contains the object on which the modifier is defined. """ + def __init__(self, owner: blendfile.BlendFileBlock) -> None: - assert owner.dna_type_name == 'Object' + assert owner.dna_type_name == "Object" self.owner = owner @@ -55,45 +56,56 @@ def mod_handler(dna_num: int): @mod_handler(cdefs.eModifierType_MeshCache) -def modifier_filepath(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: +def modifier_filepath( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: """Just yield the 'filepath' field.""" - path, field = modifier.get(b'filepath', return_field=True) - yield result.BlockUsage(modifier, path, path_full_field=field, block_name=block_name) + path, field = modifier.get(b"filepath", return_field=True) + yield result.BlockUsage( + modifier, path, path_full_field=field, block_name=block_name + ) @mod_handler(cdefs.eModifierType_MeshSequenceCache) -def modifier_mesh_sequence_cache(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, - block_name: bytes) -> typing.Iterator[result.BlockUsage]: +def modifier_mesh_sequence_cache( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: """Yield the Alembic file(s) used by this modifier""" - cache_file = modifier.get_pointer(b'cache_file') + cache_file = modifier.get_pointer(b"cache_file") if cache_file is None: return - is_sequence = bool(cache_file[b'is_sequence']) + is_sequence = bool(cache_file[b"is_sequence"]) cache_block_name = cache_file.id_name assert cache_block_name is not None - path, field = cache_file.get(b'filepath', return_field=True) - yield result.BlockUsage(cache_file, path, path_full_field=field, - is_sequence=is_sequence, - block_name=cache_block_name) + path, field = cache_file.get(b"filepath", return_field=True) + yield result.BlockUsage( + cache_file, + path, + path_full_field=field, + is_sequence=is_sequence, + block_name=cache_block_name, + ) @mod_handler(cdefs.eModifierType_Ocean) -def modifier_ocean(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - if not modifier[b'cached']: +def modifier_ocean( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + if not modifier[b"cached"]: return - path, field = modifier.get(b'cachepath', return_field=True) + path, field = modifier.get(b"cachepath", return_field=True) # The path indicates the directory containing the cached files. - yield result.BlockUsage(modifier, path, is_sequence=True, path_full_field=field, - block_name=block_name) + yield result.BlockUsage( + modifier, path, is_sequence=True, path_full_field=field, block_name=block_name + ) -def _get_texture(prop_name: bytes, dblock: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: +def _get_texture( + prop_name: bytes, dblock: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: """Yield block usages from a texture propery. Assumes dblock[prop_name] is a texture data block. @@ -102,13 +114,14 @@ def _get_texture(prop_name: bytes, dblock: blendfile.BlendFileBlock, block_name: return tx = dblock.get_pointer(prop_name) - yield from _get_image(b'ima', tx, block_name) + yield from _get_image(b"ima", tx, block_name) -def _get_image(prop_name: bytes, - dblock: typing.Optional[blendfile.BlendFileBlock], - block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: +def _get_image( + prop_name: bytes, + dblock: typing.Optional[blendfile.BlendFileBlock], + block_name: bytes, +) -> typing.Iterator[result.BlockUsage]: """Yield block usages from an image propery. Assumes dblock[prop_name] is an image data block. @@ -120,132 +133,187 @@ def _get_image(prop_name: bytes, ima = dblock.get_pointer(prop_name) except KeyError as ex: # No such property, just return. - log.debug('_get_image() called with non-existing property name: %s', ex) + log.debug("_get_image() called with non-existing property name: %s", ex) return if not ima: return - path, field = ima.get(b'name', return_field=True) + path, field = ima.get(b"name", return_field=True) yield result.BlockUsage(ima, path, path_full_field=field, block_name=block_name) @mod_handler(cdefs.eModifierType_Displace) @mod_handler(cdefs.eModifierType_Wave) -def modifier_texture(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - return _get_texture(b'texture', modifier, block_name) +def modifier_texture( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + return _get_texture(b"texture", modifier, block_name) @mod_handler(cdefs.eModifierType_WeightVGEdit) @mod_handler(cdefs.eModifierType_WeightVGMix) @mod_handler(cdefs.eModifierType_WeightVGProximity) -def modifier_mask_texture(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, - block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - return _get_texture(b'mask_texture', modifier, block_name) +def modifier_mask_texture( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + return _get_texture(b"mask_texture", modifier, block_name) @mod_handler(cdefs.eModifierType_UVProject) -def modifier_image(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - yield from _get_image(b'image', modifier, block_name) +def modifier_image( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + yield from _get_image(b"image", modifier, block_name) -def _walk_point_cache(ctx: ModifierContext, - block_name: bytes, - bfile: blendfile.BlendFile, - pointcache: blendfile.BlendFileBlock, - extension: bytes): - flag = pointcache[b'flag'] +def _walk_point_cache( + ctx: ModifierContext, + block_name: bytes, + bfile: blendfile.BlendFile, + pointcache: blendfile.BlendFileBlock, + extension: bytes, +): + flag = pointcache[b"flag"] if flag & cdefs.PTCACHE_EXTERNAL: - path, field = pointcache.get(b'path', return_field=True) - log.info(' external cache at %s', path) + path, field = pointcache.get(b"path", return_field=True) + log.info(" external cache at %s", path) bpath = bpathlib.BlendPath(path) - yield result.BlockUsage(pointcache, bpath, path_full_field=field, - is_sequence=True, block_name=block_name) + yield result.BlockUsage( + pointcache, + bpath, + path_full_field=field, + is_sequence=True, + block_name=block_name, + ) elif flag & cdefs.PTCACHE_DISK_CACHE: # See ptcache_path() in pointcache.c - name, field = pointcache.get(b'name', return_field=True) + name, field = pointcache.get(b"name", return_field=True) if not name: # See ptcache_filename() in pointcache.c - idname = ctx.owner[b'id', b'name'] + idname = ctx.owner[b"id", b"name"] name = idname[2:].hex().upper().encode() - path = b'//%b%b/%b_*%b' % ( + path = b"//%b%b/%b_*%b" % ( cdefs.PTCACHE_PATH, bfile.filepath.stem.encode(), name, - extension) - log.info(' disk cache at %s', path) + extension, + ) + log.info(" disk cache at %s", path) bpath = bpathlib.BlendPath(path) - yield result.BlockUsage(pointcache, bpath, path_full_field=field, - is_sequence=True, block_name=block_name) + yield result.BlockUsage( + pointcache, + bpath, + path_full_field=field, + is_sequence=True, + block_name=block_name, + ) @mod_handler(cdefs.eModifierType_ParticleSystem) -def modifier_particle_system(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, - block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - psys = modifier.get_pointer(b'psys') +def modifier_particle_system( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + psys = modifier.get_pointer(b"psys") if psys is None: return - pointcache = psys.get_pointer(b'pointcache') + pointcache = psys.get_pointer(b"pointcache") if pointcache is None: return - yield from _walk_point_cache(ctx, block_name, modifier.bfile, pointcache, cdefs.PTCACHE_EXT) + yield from _walk_point_cache( + ctx, block_name, modifier.bfile, pointcache, cdefs.PTCACHE_EXT + ) @mod_handler(cdefs.eModifierType_Fluidsim) -def modifier_fluid_sim(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - my_log = log.getChild('modifier_fluid_sim') +def modifier_fluid_sim( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + my_log = log.getChild("modifier_fluid_sim") - fss = modifier.get_pointer(b'fss') + fss = modifier.get_pointer(b"fss") if fss is None: - my_log.debug('Modifier %r (%r) has no fss', - modifier[b'modifier', b'name'], block_name) + my_log.debug( + "Modifier %r (%r) has no fss", modifier[b"modifier", b"name"], block_name + ) return - path, field = fss.get(b'surfdataPath', return_field=True) + path, field = fss.get(b"surfdataPath", return_field=True) # This may match more than is used by Blender, but at least it shouldn't # miss any files. # The 'fluidsurface' prefix is defined in source/blender/makesdna/DNA_object_fluidsim_types.h bpath = bpathlib.BlendPath(path) - yield result.BlockUsage(fss, bpath, path_full_field=field, - is_sequence=True, block_name=block_name) + yield result.BlockUsage( + fss, bpath, path_full_field=field, is_sequence=True, block_name=block_name + ) @mod_handler(cdefs.eModifierType_Smokesim) -def modifier_smoke_sim(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - my_log = log.getChild('modifier_smoke_sim') +def modifier_smoke_sim( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + my_log = log.getChild("modifier_smoke_sim") - domain = modifier.get_pointer(b'domain') + domain = modifier.get_pointer(b"domain") if domain is None: - my_log.debug('Modifier %r (%r) has no domain', - modifier[b'modifier', b'name'], block_name) + my_log.debug( + "Modifier %r (%r) has no domain", modifier[b"modifier", b"name"], block_name + ) return - pointcache = domain.get_pointer(b'point_cache') + pointcache = domain.get_pointer(b"point_cache") if pointcache is None: return - format = domain.get(b'cache_file_format') + format = domain.get(b"cache_file_format") extensions = { cdefs.PTCACHE_FILE_PTCACHE: cdefs.PTCACHE_EXT, - cdefs.PTCACHE_FILE_OPENVDB: cdefs.PTCACHE_EXT_VDB + cdefs.PTCACHE_FILE_OPENVDB: cdefs.PTCACHE_EXT_VDB, } - yield from _walk_point_cache(ctx, block_name, modifier.bfile, pointcache, extensions[format]) + yield from _walk_point_cache( + ctx, block_name, modifier.bfile, pointcache, extensions[format] + ) + + +@mod_handler(cdefs.eModifierType_Fluid) +def modifier_fluid( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + my_log = log.getChild("modifier_fluid") + + domain = modifier.get_pointer(b"domain") + if domain is None: + my_log.debug( + "Modifier %r (%r) has no domain", modifier[b"modifier", b"name"], block_name + ) + return + + # See fluid_bake_startjob() in physics_fluid.c + path = domain[b"cache_directory"] + path, field = domain.get(b"cache_directory", return_field=True) + + log.info(" fluid cache at %s", path) + bpath = bpathlib.BlendPath(path) + yield result.BlockUsage( + domain, + bpath, + path_full_field=field, + is_sequence=True, + block_name=block_name, + ) @mod_handler(cdefs.eModifierType_Cloth) -def modifier_cloth(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ - -> typing.Iterator[result.BlockUsage]: - pointcache = modifier.get_pointer(b'point_cache') +def modifier_cloth( + ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes +) -> typing.Iterator[result.BlockUsage]: + pointcache = modifier.get_pointer(b"point_cache") if pointcache is None: return - yield from _walk_point_cache(ctx, block_name, modifier.bfile, pointcache, cdefs.PTCACHE_EXT) + yield from _walk_point_cache( + ctx, block_name, modifier.bfile, pointcache, cdefs.PTCACHE_EXT + ) diff --git a/trace/result.py b/trace/result.py index a769e8d..a8e4093 100644 --- a/trace/result.py +++ b/trace/result.py @@ -52,32 +52,37 @@ class BlockUsage: :ivar path_base_field: field containing the basename of this asset. """ - def __init__(self, - block: blendfile.BlendFileBlock, - asset_path: bpathlib.BlendPath, - is_sequence: bool = False, - path_full_field: dna.Field = None, - path_dir_field: dna.Field = None, - path_base_field: dna.Field = None, - block_name: bytes = b'', - ) -> None: + def __init__( + self, + block: blendfile.BlendFileBlock, + asset_path: bpathlib.BlendPath, + is_sequence: bool = False, + path_full_field: dna.Field = None, + path_dir_field: dna.Field = None, + path_base_field: dna.Field = None, + block_name: bytes = b"", + ) -> None: if block_name: self.block_name = block_name else: self.block_name = self.guess_block_name(block) assert isinstance(block, blendfile.BlendFileBlock) - assert isinstance(asset_path, (bytes, bpathlib.BlendPath)), \ - 'asset_path should be BlendPath, not %r' % type(asset_path) + assert isinstance( + asset_path, (bytes, bpathlib.BlendPath) + ), "asset_path should be BlendPath, not %r" % type(asset_path) if path_full_field is None: - assert isinstance(path_dir_field, dna.Field), \ - 'path_dir_field should be dna.Field, not %r' % type(path_dir_field) - assert isinstance(path_base_field, dna.Field), \ - 'path_base_field should be dna.Field, not %r' % type(path_base_field) + assert isinstance( + path_dir_field, dna.Field + ), "path_dir_field should be dna.Field, not %r" % type(path_dir_field) + assert isinstance( + path_base_field, dna.Field + ), "path_base_field should be dna.Field, not %r" % type(path_base_field) else: - assert isinstance(path_full_field, dna.Field), \ - 'path_full_field should be dna.Field, not %r' % type(path_full_field) + assert isinstance( + path_full_field, dna.Field + ), "path_full_field should be dna.Field, not %r" % type(path_full_field) if isinstance(asset_path, bytes): asset_path = bpathlib.BlendPath(asset_path) @@ -95,26 +100,30 @@ class BlockUsage: @staticmethod def guess_block_name(block: blendfile.BlendFileBlock) -> bytes: try: - return block[b'id', b'name'] + return block[b"id", b"name"] except KeyError: pass try: - return block[b'name'] + return block[b"name"] except KeyError: pass - return b'-unnamed-' + return b"-unnamed-" def __repr__(self): if self.path_full_field is None: - field_name = self.path_dir_field.name.name_full.decode() + \ - '/' + \ - self.path_base_field.name.name_full.decode() + field_name = ( + self.path_dir_field.name.name_full.decode() + + "/" + + self.path_base_field.name.name_full.decode() + ) else: field_name = self.path_full_field.name.name_full.decode() - return '' % ( - self.block_name, self.block.dna_type_name, - field_name, self.asset_path, - ' sequence' if self.is_sequence else '' + return "" % ( + self.block_name, + self.block.dna_type_name, + field_name, + self.asset_path, + " sequence" if self.is_sequence else "", ) def files(self) -> typing.Iterator[pathlib.Path]: @@ -130,7 +139,7 @@ class BlockUsage: path = self.__fspath__() if not self.is_sequence: if not path.exists(): - log.warning('Path %s does not exist for %s', path, self) + log.warning("Path %s does not exist for %s", path, self) return yield path return @@ -138,14 +147,18 @@ class BlockUsage: try: yield from file_sequence.expand_sequence(path) except file_sequence.DoesNotExist: - log.warning('Path %s does not exist for %s', path, self) + log.warning("Path %s does not exist for %s", path, self) def __fspath__(self) -> pathlib.Path: """Determine the absolute path of the asset on the filesystem.""" if self._abspath is None: bpath = self.block.bfile.abspath(self.asset_path) - log.info('Resolved %s rel to %s -> %s', - self.asset_path, self.block.bfile.filepath, bpath) + log.info( + "Resolved %s rel to %s -> %s", + self.asset_path, + self.block.bfile.filepath, + bpath, + ) as_path = pathlib.Path(bpath.to_path()) @@ -159,15 +172,19 @@ class BlockUsage: else: self._abspath = abs_parent / as_path.name - log.info('Resolving %s rel to %s -> %s', - self.asset_path, self.block.bfile.filepath, self._abspath) + log.info( + "Resolving %s rel to %s -> %s", + self.asset_path, + self.block.bfile.filepath, + self._abspath, + ) else: - log.info('Reusing abspath %s', self._abspath) + log.info("Reusing abspath %s", self._abspath) return self._abspath abspath = property(__fspath__) - def __lt__(self, other: 'BlockUsage'): + def __lt__(self, other: "BlockUsage"): """Allow sorting for repeatable and predictable unit tests.""" if not isinstance(other, BlockUsage): raise NotImplemented()