Speed up rewriting paths by reusing cached DNA structs & blocks

This commit is contained in:
Sybren A. Stüvel 2018-03-08 10:59:45 +01:00
parent 595f8cb0a6
commit ccdccd69cf
3 changed files with 116 additions and 72 deletions

View File

@ -45,7 +45,7 @@ GZIP_MAGIC = b'\x1f\x8b'
_cached_bfiles = {} _cached_bfiles = {}
def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile': def open_cached(path: pathlib.Path, mode='rb', assert_cached=False) -> 'BlendFile':
"""Open a blend file, ensuring it is only opened once.""" """Open a blend file, ensuring it is only opened once."""
bfile_path = path.absolute().resolve() bfile_path = path.absolute().resolve()
try: try:
@ -53,6 +53,9 @@ def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile':
except KeyError: except KeyError:
pass pass
if assert_cached:
raise AssertionError('File %s was not cached' % bfile_path)
bfile = BlendFile(path, mode=mode) bfile = BlendFile(path, mode=mode)
_cached_bfiles[bfile_path] = bfile _cached_bfiles[bfile_path] = bfile
return bfile return bfile
@ -60,12 +63,28 @@ def open_cached(path: pathlib.Path, mode='rb') -> 'BlendFile':
@atexit.register @atexit.register
def close_all_cached(): def close_all_cached():
log.info('Closing all blend files') if not _cached_bfiles:
# Don't even log anything when there is nothing to close
return
log.info('Closing %d cached blend files', len(_cached_bfiles))
for bfile in _cached_bfiles.values(): for bfile in _cached_bfiles.values():
bfile.close() bfile.close()
_cached_bfiles.clear() _cached_bfiles.clear()
def _cache(path: pathlib.Path, bfile: 'BlendFile'):
"""Add a BlendFile to the cache."""
bfile_path = path.absolute().resolve()
_cached_bfiles[bfile_path] = bfile
def _uncache(path: pathlib.Path):
"""Remove a BlendFile object from the cache."""
bfile_path = path.absolute().resolve()
_cached_bfiles.pop(bfile_path, None)
class BlendFile: class BlendFile:
"""Representation of a blend file. """Representation of a blend file.
@ -86,16 +105,45 @@ class BlendFile:
:param mode: see mode description of pathlib.Path.open() :param mode: see mode description of pathlib.Path.open()
""" """
self.filepath = path self.filepath = path
self.raw_filepath = path
self._is_modified = False self._is_modified = False
self._open_file(path, mode)
self.blocks = [] # BlendFileBlocks, in disk order.
self.code_index = collections.defaultdict(list)
self.structs = []
self.sdna_index_from_id = {}
self.block_from_addr = {}
try:
self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath)
self.block_header_struct = self.header.create_block_header_struct()
self._load_blocks()
except Exception:
self.fileobj.close()
raise
def _open_file(self, path: pathlib.Path, mode: str):
"""Open a blend file, decompressing if necessary.
This does not parse the blend file yet, just makes sure that
self.fileobj is opened and that self.filepath and self.raw_filepath
are set.
:raises exceptions.BlendFileError: when the blend file doesn't have the
correct magic bytes.
"""
fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE)
fileobj.seek(0, os.SEEK_SET) fileobj.seek(0, os.SEEK_SET)
magic = fileobj.read(len(BLENDFILE_MAGIC))
magic = fileobj.read(len(BLENDFILE_MAGIC))
if magic == BLENDFILE_MAGIC: if magic == BLENDFILE_MAGIC:
self.is_compressed = False self.is_compressed = False
self.raw_filepath = path self.raw_filepath = path
self.fileobj = fileobj self.fileobj = fileobj
elif magic[:2] == GZIP_MAGIC: elif magic[:2] == GZIP_MAGIC:
self.is_compressed = True self.is_compressed = True
@ -117,24 +165,11 @@ class BlendFile:
self.raw_filepath = pathlib.Path(tmpfile.name) self.raw_filepath = pathlib.Path(tmpfile.name)
fileobj.close() fileobj.close()
self.fileobj = tmpfile self.fileobj = tmpfile
elif magic != BLENDFILE_MAGIC: elif magic != BLENDFILE_MAGIC:
fileobj.close() fileobj.close()
raise exceptions.BlendFileError("File is not a blend file", path) raise exceptions.BlendFileError("File is not a blend file", path)
self.blocks = [] # BlendFileBlocks, in disk order.
self.code_index = collections.defaultdict(list)
self.structs = []
self.sdna_index_from_id = {}
self.block_from_addr = {}
try:
self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath)
self.block_header_struct = self.header.create_block_header_struct()
self._load_blocks()
except Exception:
fileobj.close()
raise
def _load_blocks(self): def _load_blocks(self):
"""Read the blend file to load its DNA structure to memory.""" """Read the blend file to load its DNA structure to memory."""
while True: while True:
@ -167,6 +202,20 @@ class BlendFile:
def __exit__(self, exctype, excvalue, traceback): def __exit__(self, exctype, excvalue, traceback):
self.close() self.close()
def rebind(self, path: pathlib.Path, mode='rb'):
"""Change which file is bound to this BlendFile.
This allows cloning a previously opened file, and rebinding it to reuse
the already-loaded DNA structs and data blocks.
"""
log.debug('Rebinding %r to %s', self, path)
self.close()
_uncache(self.filepath)
self._open_file(path, mode=mode)
_cache(path, self)
@property @property
def is_modified(self) -> bool: def is_modified(self) -> bool:
return self._is_modified return self._is_modified

View File

@ -30,7 +30,7 @@ class Packer:
blendfile: pathlib.Path, blendfile: pathlib.Path,
project: pathlib.Path, project: pathlib.Path,
target: pathlib.Path, target: pathlib.Path,
noop: bool): noop=False):
self.blendfile = blendfile self.blendfile = blendfile
self.project = project self.project = project
self.target = target self.target = target
@ -47,6 +47,8 @@ class Packer:
self._rewrites = collections.defaultdict(list) self._rewrites = collections.defaultdict(list)
self._packed_paths = {} # from path in project to path in BAT Pack dir. self._packed_paths = {} # from path in project to path in BAT Pack dir.
self._copy_cache_miss = self._copy_cache_hit = 0
def strategise(self): def strategise(self):
"""Determine what to do with the assets. """Determine what to do with the assets.
@ -58,9 +60,11 @@ class Packer:
# The blendfile that we pack is generally not its own dependency, so # The blendfile that we pack is generally not its own dependency, so
# we have to explicitly add it to the _packed_paths. # we have to explicitly add it to the _packed_paths.
bfile_path = self.blendfile.absolute() bfile_path = self.blendfile.absolute()
self._packed_paths[bfile_path] = self.target / bfile_path.relative_to(self.project) bfile_pp = self.target / bfile_path.relative_to(self.project)
act = self._actions[bfile_path] act = self._actions[bfile_path]
act.path_action = PathAction.KEEP_PATH act.path_action = PathAction.KEEP_PATH
act.new_path = self._packed_paths[bfile_path] = bfile_pp
new_location_paths = set() new_location_paths = set()
for usage in tracer.deps(self.blendfile): for usage in tracer.deps(self.blendfile):
@ -84,7 +88,8 @@ class Packer:
new_location_paths.add(asset_path) new_location_paths.add(asset_path)
else: else:
log.info('%s can keep using %s', bfile_path, usage.asset_path) log.info('%s can keep using %s', bfile_path, usage.asset_path)
self._packed_paths[asset_path] = self.target / asset_path.relative_to(self.project) asset_pp = self.target / asset_path.relative_to(self.project)
act.new_path = self._packed_paths[asset_path] = asset_pp
self._find_new_paths(new_location_paths) self._find_new_paths(new_location_paths)
self._group_rewrites() self._group_rewrites()
@ -132,6 +137,7 @@ class Packer:
self._copy_files_to_target() self._copy_files_to_target()
if not self.noop: if not self.noop:
self._rewrite_paths() self._rewrite_paths()
log.info('Copy cache: %d hits / %d misses', self._copy_cache_miss, self._copy_cache_hit)
def _copy_files_to_target(self): def _copy_files_to_target(self):
"""Copy all assets to the target directoy. """Copy all assets to the target directoy.
@ -157,7 +163,11 @@ class Packer:
log.info('Rewriting %s', bfile_pp) log.info('Rewriting %s', bfile_pp)
with blendfile.BlendFile(bfile_pp, 'rb+') as bfile: # The original blend file will have been cached, so we can use it
# to avoid re-parsing all data blocks in the to-be-rewritten file.
bfile = blendfile.open_cached(bfile_path, assert_cached=True)
bfile.rebind(bfile_pp, mode='rb+')
for usage in rewrites: for usage in rewrites:
assert isinstance(usage, result.BlockUsage) assert isinstance(usage, result.BlockUsage)
asset_pp = self._packed_paths[usage.abspath] asset_pp = self._packed_paths[usage.abspath]
@ -190,7 +200,6 @@ class Packer:
usage.path_full_field.name.name_only, block) usage.path_full_field.name.name_only, block)
written = block.set(usage.path_full_field.name.name_only, relpath) written = block.set(usage.path_full_field.name.name_only, relpath)
log.info(' - written %d bytes', written) log.info(' - written %d bytes', written)
bfile.fileobj.flush()
def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction): def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction):
log.info('Copying %s and dependencies', asset_path) log.info('Copying %s and dependencies', asset_path)
@ -199,10 +208,9 @@ class Packer:
packed_path = self._packed_paths[asset_path] packed_path = self._packed_paths[asset_path]
self._copy_to_target(asset_path, packed_path) self._copy_to_target(asset_path, packed_path)
# Copy its dependencies. # Copy its sequence dependencies.
for usage in action.usages: for usage in action.usages:
if not usage.is_sequence: if not usage.is_sequence:
self._copy_to_target(usage.abspath, packed_path)
continue continue
first_pp = self._packed_paths[usage.abspath] first_pp = self._packed_paths[usage.abspath]
@ -216,10 +224,10 @@ class Packer:
packed_path = packed_base_dir / file_path.name packed_path = packed_base_dir / file_path.name
self._copy_to_target(file_path, packed_path) self._copy_to_target(file_path, packed_path)
def _copy_to_target(self, asset_path: pathlib.Path, target: pathlib.Path): # Assumption: all data blocks using this asset use it the same way.
if self._is_already_copied(asset_path): break
return
def _copy_to_target(self, asset_path: pathlib.Path, target: pathlib.Path):
print('%s%s' % (asset_path, target)) print('%s%s' % (asset_path, target))
if self.noop: if self.noop:
return return
@ -227,18 +235,3 @@ class Packer:
target.parent.mkdir(parents=True, exist_ok=True) target.parent.mkdir(parents=True, exist_ok=True)
# TODO(Sybren): when we target Py 3.6+, remove the str() calls. # TODO(Sybren): when we target Py 3.6+, remove the str() calls.
shutil.copyfile(str(asset_path), str(target)) shutil.copyfile(str(asset_path), str(target))
def _is_already_copied(self, asset_path: pathlib.Path) -> bool:
try:
asset_path = asset_path.resolve()
except FileNotFoundError:
log.error('Dependency %s does not exist', asset_path)
return True
if asset_path in self._already_copied:
log.debug('Already copied %s', asset_path)
return True
# Assume the copy will happen soon.
self._already_copied.add(asset_path)
return False

View File

@ -2,6 +2,8 @@ import logging
import pathlib import pathlib
import unittest import unittest
from blender_asset_tracer import blendfile
class AbstractBlendFileTest(unittest.TestCase): class AbstractBlendFileTest(unittest.TestCase):
@classmethod @classmethod
@ -15,5 +17,5 @@ class AbstractBlendFileTest(unittest.TestCase):
self.bf = None self.bf = None
def tearDown(self): def tearDown(self):
if self.bf: self.bf = None
self.bf.close() blendfile.close_all_cached()