Speed up rewriting paths by reusing cached DNA structs & blocks

2018-03-08 10:59:45 +01:00 · 2018-03-08 10:59:45 +01:00 · ccdccd69cf
commit ccdccd69cf
parent 595f8cb0a6
3 changed files with 116 additions and 72 deletions
--- a/blender_asset_tracer/blendfile/init.py
+++ b/blender_asset_tracer/blendfile/init.py
@ -45,7 +45,7 @@ GZIP_MAGIC = b'\x1f\x8b'
 _cached_bfiles = {}
-def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile':
+def open_cached(path: pathlib.Path, mode='rb', assert_cached=False) -> 'BlendFile':
    """Open a blend file, ensuring it is only opened once."""
    bfile_path = path.absolute().resolve()
    try:
@ -53,6 +53,9 @@ def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile':
    except KeyError:
        pass
    if assert_cached:
        raise AssertionError('File %s was not cached' % bfile_path)
    bfile = BlendFile(path, mode=mode)
    _cached_bfiles[bfile_path] = bfile
    return bfile
@ -60,12 +63,28 @@ def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile':
@atexit.register
 def close_all_cached():
-    log.info('Closing all blend files')
+    if not _cached_bfiles:
        # Don't even log anything when there is nothing to close
        return
    log.info('Closing %d cached blend files', len(_cached_bfiles))
    for bfile in _cached_bfiles.values():
        bfile.close()
    _cached_bfiles.clear()
 def _cache(path: pathlib.Path, bfile: 'BlendFile'):
    """Add a BlendFile to the cache."""
    bfile_path = path.absolute().resolve()
    _cached_bfiles[bfile_path] = bfile
 def _uncache(path: pathlib.Path):
    """Remove a BlendFile object from the cache."""
    bfile_path = path.absolute().resolve()
    _cached_bfiles.pop(bfile_path, None)
 class BlendFile:
    """Representation of a blend file.
@ -86,16 +105,45 @@ class BlendFile:
        :param mode: see mode description of pathlib.Path.open()
        """
        self.filepath = path
        self.raw_filepath = path
        self._is_modified = False
        self._open_file(path, mode)
        self.blocks = []  # BlendFileBlocks, in disk order.
        self.code_index = collections.defaultdict(list)
        self.structs = []
        self.sdna_index_from_id = {}
        self.block_from_addr = {}
        try:
            self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath)
            self.block_header_struct = self.header.create_block_header_struct()
            self._load_blocks()
        except Exception:
            self.fileobj.close()
            raise
    def _open_file(self, path: pathlib.Path, mode: str):
        """Open a blend file, decompressing if necessary.
        This does not parse the blend file yet, just makes sure that
        self.fileobj is opened and that self.filepath and self.raw_filepath
        are set.
        :raises exceptions.BlendFileError: when the blend file doesn't have the
            correct magic bytes.
        """
        fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE)
        fileobj.seek(0, os.SEEK_SET)
        magic = fileobj.read(len(BLENDFILE_MAGIC))
        magic = fileobj.read(len(BLENDFILE_MAGIC))
        if magic == BLENDFILE_MAGIC:
            self.is_compressed = False
            self.raw_filepath = path
            self.fileobj = fileobj
        elif magic[:2] == GZIP_MAGIC:
            self.is_compressed = True
@ -117,24 +165,11 @@ class BlendFile:
            self.raw_filepath = pathlib.Path(tmpfile.name)
            fileobj.close()
            self.fileobj = tmpfile
        elif magic != BLENDFILE_MAGIC:
            fileobj.close()
            raise exceptions.BlendFileError("File is not a blend file", path)
        self.blocks = []  # BlendFileBlocks, in disk order.
        self.code_index = collections.defaultdict(list)
        self.structs = []
        self.sdna_index_from_id = {}
        self.block_from_addr = {}
        try:
            self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath)
            self.block_header_struct = self.header.create_block_header_struct()
            self._load_blocks()
        except Exception:
            fileobj.close()
            raise
    def _load_blocks(self):
        """Read the blend file to load its DNA structure to memory."""
        while True:
@ -167,6 +202,20 @@ class BlendFile:
    def __exit__(self, exctype, excvalue, traceback):
        self.close()
    def rebind(self, path: pathlib.Path, mode='rb'):
        """Change which file is bound to this BlendFile.
        This allows cloning a previously opened file, and rebinding it to reuse
        the already-loaded DNA structs and data blocks.
        """
        log.debug('Rebinding %r to %s', self, path)
        self.close()
        _uncache(self.filepath)
        self._open_file(path, mode=mode)
        _cache(path, self)
    @property
    def is_modified(self) -> bool:
        return self._is_modified
--- a/blender_asset_tracer/pack/init.py
+++ b/blender_asset_tracer/pack/init.py
@ -30,7 +30,7 @@ class Packer:
                 blendfile: pathlib.Path,
                 project: pathlib.Path,
                 target: pathlib.Path,
-                 noop: bool):
+                 noop=False):
        self.blendfile = blendfile
        self.project = project
        self.target = target
@ -47,6 +47,8 @@ class Packer:
        self._rewrites = collections.defaultdict(list)
        self._packed_paths = {}  # from path in project to path in BAT Pack dir.
        self._copy_cache_miss = self._copy_cache_hit = 0
    def strategise(self):
        """Determine what to do with the assets.
@ -58,9 +60,11 @@ class Packer:
        # The blendfile that we pack is generally not its own dependency, so
        # we have to explicitly add it to the _packed_paths.
        bfile_path = self.blendfile.absolute()
-        self._packed_paths[bfile_path] = self.target / bfile_path.relative_to(self.project)
+        bfile_pp = self.target / bfile_path.relative_to(self.project)
        act = self._actions[bfile_path]
        act.path_action = PathAction.KEEP_PATH
        act.new_path = self._packed_paths[bfile_path] = bfile_pp
        new_location_paths = set()
        for usage in tracer.deps(self.blendfile):
@ -84,7 +88,8 @@ class Packer:
                new_location_paths.add(asset_path)
            else:
                log.info('%s can keep using %s', bfile_path, usage.asset_path)
-                self._packed_paths[asset_path] = self.target / asset_path.relative_to(self.project)
+                asset_pp = self.target / asset_path.relative_to(self.project)
                act.new_path = self._packed_paths[asset_path] = asset_pp
        self._find_new_paths(new_location_paths)
        self._group_rewrites()
@ -132,6 +137,7 @@ class Packer:
        self._copy_files_to_target()
        if not self.noop:
            self._rewrite_paths()
        log.info('Copy cache: %d hits / %d misses', self._copy_cache_miss, self._copy_cache_hit)
    def _copy_files_to_target(self):
        """Copy all assets to the target directoy.
@ -157,7 +163,11 @@ class Packer:
            log.info('Rewriting %s', bfile_pp)
-            with blendfile.BlendFile(bfile_pp, 'rb+') as bfile:
+            # The original blend file will have been cached, so we can use it
            # to avoid re-parsing all data blocks in the to-be-rewritten file.
            bfile = blendfile.open_cached(bfile_path, assert_cached=True)
            bfile.rebind(bfile_pp, mode='rb+')
            for usage in rewrites:
                assert isinstance(usage, result.BlockUsage)
                asset_pp = self._packed_paths[usage.abspath]
@ -190,7 +200,6 @@ class Packer:
                             usage.path_full_field.name.name_only, block)
                    written = block.set(usage.path_full_field.name.name_only, relpath)
                    log.info('   - written %d bytes', written)
                bfile.fileobj.flush()
    def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction):
        log.info('Copying %s and dependencies', asset_path)
@ -199,10 +208,9 @@ class Packer:
        packed_path = self._packed_paths[asset_path]
        self._copy_to_target(asset_path, packed_path)
-        # Copy its dependencies.
+        # Copy its sequence dependencies.
        for usage in action.usages:
            if not usage.is_sequence:
                self._copy_to_target(usage.abspath, packed_path)
                continue
            first_pp = self._packed_paths[usage.abspath]
@ -216,10 +224,10 @@ class Packer:
                packed_path = packed_base_dir / file_path.name
                self._copy_to_target(file_path, packed_path)
-    def _copy_to_target(self, asset_path: pathlib.Path, target: pathlib.Path):
+            # Assumption: all data blocks using this asset use it the same way.
-        if self._is_already_copied(asset_path):
+            break
            return
    def _copy_to_target(self, asset_path: pathlib.Path, target: pathlib.Path):
        print('%s → %s' % (asset_path, target))
        if self.noop:
            return
@ -227,18 +235,3 @@ class Packer:
        target.parent.mkdir(parents=True, exist_ok=True)
        # TODO(Sybren): when we target Py 3.6+, remove the str() calls.
        shutil.copyfile(str(asset_path), str(target))
    def _is_already_copied(self, asset_path: pathlib.Path) -> bool:
        try:
            asset_path = asset_path.resolve()
        except FileNotFoundError:
            log.error('Dependency %s does not exist', asset_path)
            return True
        if asset_path in self._already_copied:
            log.debug('Already copied %s', asset_path)
            return True
        # Assume the copy will happen soon.
        self._already_copied.add(asset_path)
        return False
--- a/tests/abstract_test.py
+++ b/tests/abstract_test.py
@ -2,6 +2,8 @@ import logging
 import pathlib
 import unittest
 from blender_asset_tracer import blendfile
 class AbstractBlendFileTest(unittest.TestCase):
    @classmethod
@ -15,5 +17,5 @@ class AbstractBlendFileTest(unittest.TestCase):
        self.bf = None
    def tearDown(self):
-        if self.bf:
+        self.bf = None
-            self.bf.close()
+        blendfile.close_all_cached()