diff --git a/blender_asset_tracer/blendfile/__init__.py b/blender_asset_tracer/blendfile/__init__.py index 37444de..f344176 100644 --- a/blender_asset_tracer/blendfile/__init__.py +++ b/blender_asset_tracer/blendfile/__init__.py @@ -45,7 +45,7 @@ GZIP_MAGIC = b'\x1f\x8b' _cached_bfiles = {} -def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile': +def open_cached(path: pathlib.Path, mode='rb', assert_cached=False) -> 'BlendFile': """Open a blend file, ensuring it is only opened once.""" bfile_path = path.absolute().resolve() try: @@ -53,6 +53,9 @@ def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile': except KeyError: pass + if assert_cached: + raise AssertionError('File %s was not cached' % bfile_path) + bfile = BlendFile(path, mode=mode) _cached_bfiles[bfile_path] = bfile return bfile @@ -60,12 +63,28 @@ def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile': @atexit.register def close_all_cached(): - log.info('Closing all blend files') + if not _cached_bfiles: + # Don't even log anything when there is nothing to close + return + + log.info('Closing %d cached blend files', len(_cached_bfiles)) for bfile in _cached_bfiles.values(): bfile.close() _cached_bfiles.clear() +def _cache(path: pathlib.Path, bfile: 'BlendFile'): + """Add a BlendFile to the cache.""" + bfile_path = path.absolute().resolve() + _cached_bfiles[bfile_path] = bfile + + +def _uncache(path: pathlib.Path): + """Remove a BlendFile object from the cache.""" + bfile_path = path.absolute().resolve() + _cached_bfiles.pop(bfile_path, None) + + class BlendFile: """Representation of a blend file. @@ -86,16 +105,45 @@ class BlendFile: :param mode: see mode description of pathlib.Path.open() """ self.filepath = path + self.raw_filepath = path self._is_modified = False + self._open_file(path, mode) + + self.blocks = [] # BlendFileBlocks, in disk order. + self.code_index = collections.defaultdict(list) + self.structs = [] + self.sdna_index_from_id = {} + self.block_from_addr = {} + + try: + self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath) + self.block_header_struct = self.header.create_block_header_struct() + self._load_blocks() + except Exception: + self.fileobj.close() + raise + + def _open_file(self, path: pathlib.Path, mode: str): + """Open a blend file, decompressing if necessary. + + This does not parse the blend file yet, just makes sure that + self.fileobj is opened and that self.filepath and self.raw_filepath + are set. + + :raises exceptions.BlendFileError: when the blend file doesn't have the + correct magic bytes. + """ + fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) fileobj.seek(0, os.SEEK_SET) - magic = fileobj.read(len(BLENDFILE_MAGIC)) + magic = fileobj.read(len(BLENDFILE_MAGIC)) if magic == BLENDFILE_MAGIC: self.is_compressed = False self.raw_filepath = path self.fileobj = fileobj + elif magic[:2] == GZIP_MAGIC: self.is_compressed = True @@ -117,24 +165,11 @@ class BlendFile: self.raw_filepath = pathlib.Path(tmpfile.name) fileobj.close() self.fileobj = tmpfile + elif magic != BLENDFILE_MAGIC: fileobj.close() raise exceptions.BlendFileError("File is not a blend file", path) - self.blocks = [] # BlendFileBlocks, in disk order. - self.code_index = collections.defaultdict(list) - self.structs = [] - self.sdna_index_from_id = {} - self.block_from_addr = {} - - try: - self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath) - self.block_header_struct = self.header.create_block_header_struct() - self._load_blocks() - except Exception: - fileobj.close() - raise - def _load_blocks(self): """Read the blend file to load its DNA structure to memory.""" while True: @@ -167,6 +202,20 @@ class BlendFile: def __exit__(self, exctype, excvalue, traceback): self.close() + def rebind(self, path: pathlib.Path, mode='rb'): + """Change which file is bound to this BlendFile. + + This allows cloning a previously opened file, and rebinding it to reuse + the already-loaded DNA structs and data blocks. + """ + log.debug('Rebinding %r to %s', self, path) + + self.close() + _uncache(self.filepath) + + self._open_file(path, mode=mode) + _cache(path, self) + @property def is_modified(self) -> bool: return self._is_modified diff --git a/blender_asset_tracer/pack/__init__.py b/blender_asset_tracer/pack/__init__.py index 62557e8..64fd017 100644 --- a/blender_asset_tracer/pack/__init__.py +++ b/blender_asset_tracer/pack/__init__.py @@ -30,7 +30,7 @@ class Packer: blendfile: pathlib.Path, project: pathlib.Path, target: pathlib.Path, - noop: bool): + noop=False): self.blendfile = blendfile self.project = project self.target = target @@ -47,6 +47,8 @@ class Packer: self._rewrites = collections.defaultdict(list) self._packed_paths = {} # from path in project to path in BAT Pack dir. + self._copy_cache_miss = self._copy_cache_hit = 0 + def strategise(self): """Determine what to do with the assets. @@ -58,9 +60,11 @@ class Packer: # The blendfile that we pack is generally not its own dependency, so # we have to explicitly add it to the _packed_paths. bfile_path = self.blendfile.absolute() - self._packed_paths[bfile_path] = self.target / bfile_path.relative_to(self.project) + bfile_pp = self.target / bfile_path.relative_to(self.project) + act = self._actions[bfile_path] act.path_action = PathAction.KEEP_PATH + act.new_path = self._packed_paths[bfile_path] = bfile_pp new_location_paths = set() for usage in tracer.deps(self.blendfile): @@ -84,7 +88,8 @@ class Packer: new_location_paths.add(asset_path) else: log.info('%s can keep using %s', bfile_path, usage.asset_path) - self._packed_paths[asset_path] = self.target / asset_path.relative_to(self.project) + asset_pp = self.target / asset_path.relative_to(self.project) + act.new_path = self._packed_paths[asset_path] = asset_pp self._find_new_paths(new_location_paths) self._group_rewrites() @@ -132,6 +137,7 @@ class Packer: self._copy_files_to_target() if not self.noop: self._rewrite_paths() + log.info('Copy cache: %d hits / %d misses', self._copy_cache_miss, self._copy_cache_hit) def _copy_files_to_target(self): """Copy all assets to the target directoy. @@ -157,40 +163,43 @@ class Packer: log.info('Rewriting %s', bfile_pp) - with blendfile.BlendFile(bfile_pp, 'rb+') as bfile: - for usage in rewrites: - assert isinstance(usage, result.BlockUsage) - asset_pp = self._packed_paths[usage.abspath] - assert isinstance(asset_pp, pathlib.Path) + # The original blend file will have been cached, so we can use it + # to avoid re-parsing all data blocks in the to-be-rewritten file. + bfile = blendfile.open_cached(bfile_path, assert_cached=True) + bfile.rebind(bfile_pp, mode='rb+') - log.debug(' - %s is packed at %s', usage.asset_path, asset_pp) - relpath = bpathlib.BlendPath.mkrelative(asset_pp, bfile_pp) - if relpath == usage.asset_path: - log.info(' - %s remained at %s', usage.asset_path, relpath) - continue + for usage in rewrites: + assert isinstance(usage, result.BlockUsage) + asset_pp = self._packed_paths[usage.abspath] + assert isinstance(asset_pp, pathlib.Path) - log.info(' - %s moved to %s', usage.asset_path, relpath) + log.debug(' - %s is packed at %s', usage.asset_path, asset_pp) + relpath = bpathlib.BlendPath.mkrelative(asset_pp, bfile_pp) + if relpath == usage.asset_path: + log.info(' - %s remained at %s', usage.asset_path, relpath) + continue - # Find the same block in the newly copied file. - block = bfile.dereference_pointer(usage.block.addr_old) - if usage.path_full_field is None: - log.info(' - updating field %s of block %s', - usage.path_dir_field.name.name_only, block) - reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp) - written = block.set(usage.path_dir_field.name.name_only, reldir) - log.info(' - written %d bytes', written) + log.info(' - %s moved to %s', usage.asset_path, relpath) - # BIG FAT ASSUMPTION that the filename (e.g. basename - # without path) does not change. This makes things much - # easier, as in the sequence editor the directory and - # filename fields are in different blocks. See the - # blocks2assets.scene() function for the implementation. - else: - log.info(' - updating field %s of block %s', - usage.path_full_field.name.name_only, block) - written = block.set(usage.path_full_field.name.name_only, relpath) - log.info(' - written %d bytes', written) - bfile.fileobj.flush() + # Find the same block in the newly copied file. + block = bfile.dereference_pointer(usage.block.addr_old) + if usage.path_full_field is None: + log.info(' - updating field %s of block %s', + usage.path_dir_field.name.name_only, block) + reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp) + written = block.set(usage.path_dir_field.name.name_only, reldir) + log.info(' - written %d bytes', written) + + # BIG FAT ASSUMPTION that the filename (e.g. basename + # without path) does not change. This makes things much + # easier, as in the sequence editor the directory and + # filename fields are in different blocks. See the + # blocks2assets.scene() function for the implementation. + else: + log.info(' - updating field %s of block %s', + usage.path_full_field.name.name_only, block) + written = block.set(usage.path_full_field.name.name_only, relpath) + log.info(' - written %d bytes', written) def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction): log.info('Copying %s and dependencies', asset_path) @@ -199,10 +208,9 @@ class Packer: packed_path = self._packed_paths[asset_path] self._copy_to_target(asset_path, packed_path) - # Copy its dependencies. + # Copy its sequence dependencies. for usage in action.usages: if not usage.is_sequence: - self._copy_to_target(usage.abspath, packed_path) continue first_pp = self._packed_paths[usage.abspath] @@ -216,10 +224,10 @@ class Packer: packed_path = packed_base_dir / file_path.name self._copy_to_target(file_path, packed_path) - def _copy_to_target(self, asset_path: pathlib.Path, target: pathlib.Path): - if self._is_already_copied(asset_path): - return + # Assumption: all data blocks using this asset use it the same way. + break + def _copy_to_target(self, asset_path: pathlib.Path, target: pathlib.Path): print('%s → %s' % (asset_path, target)) if self.noop: return @@ -227,18 +235,3 @@ class Packer: target.parent.mkdir(parents=True, exist_ok=True) # TODO(Sybren): when we target Py 3.6+, remove the str() calls. shutil.copyfile(str(asset_path), str(target)) - - def _is_already_copied(self, asset_path: pathlib.Path) -> bool: - try: - asset_path = asset_path.resolve() - except FileNotFoundError: - log.error('Dependency %s does not exist', asset_path) - return True - - if asset_path in self._already_copied: - log.debug('Already copied %s', asset_path) - return True - - # Assume the copy will happen soon. - self._already_copied.add(asset_path) - return False diff --git a/tests/abstract_test.py b/tests/abstract_test.py index d9f4f60..378c38f 100644 --- a/tests/abstract_test.py +++ b/tests/abstract_test.py @@ -2,6 +2,8 @@ import logging import pathlib import unittest +from blender_asset_tracer import blendfile + class AbstractBlendFileTest(unittest.TestCase): @classmethod @@ -15,5 +17,5 @@ class AbstractBlendFileTest(unittest.TestCase): self.bf = None def tearDown(self): - if self.bf: - self.bf.close() + self.bf = None + blendfile.close_all_cached()