From 824ca4bcb75114f021e286c9a821f747564f3293 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Fri, 9 Mar 2018 12:33:15 +0100 Subject: [PATCH] Rewrite blend files in temporary directory, before copying This allows us to later support uploading to a non-local filesystem, such as Amazon S3 cloud storage, after path rewriting is complete. --- blender_asset_tracer/blendfile/__init__.py | 41 +++++++--- blender_asset_tracer/pack/__init__.py | 72 +++++++++++++---- tests/test_blendfile_loading.py | 91 ++++++++++++++++++++++ tests/test_pack.py | 53 ++++++++----- 4 files changed, 212 insertions(+), 45 deletions(-) diff --git a/blender_asset_tracer/blendfile/__init__.py b/blender_asset_tracer/blendfile/__init__.py index 5407897..3c18b58 100644 --- a/blender_asset_tracer/blendfile/__init__.py +++ b/blender_asset_tracer/blendfile/__init__.py @@ -28,6 +28,7 @@ import logging import os import struct import pathlib +import shutil import tempfile import typing @@ -44,19 +45,28 @@ BFBList = typing.List['BlendFileBlock'] _cached_bfiles = {} # type: typing.Dict[pathlib.Path, BlendFile] -def open_cached(path: pathlib.Path, mode='rb', assert_cached=False) -> 'BlendFile': +def open_cached(path: pathlib.Path, mode='rb', + assert_cached: typing.Optional[bool] = None) -> 'BlendFile': """Open a blend file, ensuring it is only opened once.""" + my_log = log.getChild('open_cached') bfile_path = path.absolute().resolve() + + if assert_cached is not None: + is_cached = bfile_path in _cached_bfiles + if assert_cached and not is_cached: + raise AssertionError('File %s was not cached' % bfile_path) + elif not assert_cached and is_cached: + raise AssertionError('File %s was cached' % bfile_path) + try: - return _cached_bfiles[bfile_path] + bfile = _cached_bfiles[bfile_path] except KeyError: - pass + my_log.debug('Opening non-cached %s', path) + bfile = BlendFile(path, mode=mode) + _cached_bfiles[bfile_path] = bfile + else: + my_log.debug('Returning cached %s', path) - if assert_cached: - raise AssertionError('File %s was not cached' % bfile_path) - - bfile = BlendFile(path, mode=mode) - _cached_bfiles[bfile_path] = bfile return bfile @@ -67,7 +77,7 @@ def close_all_cached(): return log.debug('Closing %d cached blend files', len(_cached_bfiles)) - for bfile in _cached_bfiles.values(): + for bfile in list(_cached_bfiles.values()): bfile.close() _cached_bfiles.clear() @@ -136,6 +146,8 @@ class BlendFile: if 'b' not in mode: raise ValueError('Only binary modes are supported, not %r' % mode) + self.filepath = path + fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) # typing.IO[bytes] fileobj.seek(0, os.SEEK_SET) @@ -206,7 +218,7 @@ class BlendFile: def __exit__(self, exctype, excvalue, traceback): self.close() - def rebind(self, path: pathlib.Path, mode='rb'): + def copy_and_rebind(self, path: pathlib.Path, mode='rb'): """Change which file is bound to this BlendFile. This allows cloning a previously opened file, and rebinding it to reuse @@ -217,6 +229,10 @@ class BlendFile: self.close() _uncache(self.filepath) + self.log.debug('Copying %s to %s', self.filepath, path) + # TODO(Sybren): remove str() calls when targeting Python 3.6+ + shutil.copy(str(self.filepath), str(path)) + self._open_file(path, mode=mode) _cache(path, self) @@ -261,6 +277,11 @@ class BlendFile: self.fileobj.close() self._is_modified = False + try: + del _cached_bfiles[self.filepath] + except KeyError: + pass + def ensure_subtype_smaller(self, sdna_index_curr, sdna_index_next): # never refine to a smaller type curr_struct = self.structs[sdna_index_curr] diff --git a/blender_asset_tracer/pack/__init__.py b/blender_asset_tracer/pack/__init__.py index efc93c9..565adc0 100644 --- a/blender_asset_tracer/pack/__init__.py +++ b/blender_asset_tracer/pack/__init__.py @@ -3,6 +3,8 @@ import enum import functools import logging import pathlib +import tempfile + import typing from blender_asset_tracer import trace, bpathlib, blendfile @@ -33,7 +35,12 @@ class AssetAction: self.new_path = None # type: pathlib.Path """Absolute path to the asset in the BAT Pack.""" - self.read_from = None # type: pathlib.Path + self.read_from = None # type: typing.Optional[pathlib.Path] + """Optional path from which to read the asset. + + This is used when blend files have been rewritten. It is assumed that + when this property is set, the file can be moved instead of copied. + """ self.rewrites = [] # type: typing.List[result.BlockUsage] """BlockUsage objects in this asset that may require rewriting. @@ -65,6 +72,19 @@ class Packer: # Number of files we would copy, if not for --noop self._file_count = 0 + self._tmpdir = tempfile.TemporaryDirectory(suffix='-batpack') + self._rewrite_in = pathlib.Path(self._tmpdir.name) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + """Clean up any temporary files.""" + self._tmpdir.cleanup() + def strategise(self): """Determine what to do with the assets. @@ -148,9 +168,9 @@ class Packer: """Execute the strategy.""" assert self._actions, 'Run strategise() first' - self._copy_files_to_target() if not self.noop: self._rewrite_paths() + self._copy_files_to_target() def _copy_files_to_target(self): """Copy all assets to the target directoy. @@ -172,21 +192,36 @@ class Packer: fc.done_and_join() def _rewrite_paths(self): - """Rewrite paths to the new location of the assets.""" + """Rewrite paths to the new location of the assets. + + Writes the rewritten blend files to a temporary location. + """ for bfile_path, action in self._actions.items(): if not action.rewrites: continue assert isinstance(bfile_path, pathlib.Path) + # bfile_pp is the final path of this blend file in the BAT pack. + # It is used to determine relative paths to other blend files. + # It is *not* used for any disk I/O, since the file may not even + # exist on the local filesystem. bfile_pp = self._actions[bfile_path].new_path - log.info('Rewriting %s', bfile_pp) + # Use tempfile to create a unique name in our temporary directoy. + # The file should be deleted when self.close() is called, and not + # when the bfile_tp object is GC'd. + bfile_tmp = tempfile.NamedTemporaryFile(dir=str(self._rewrite_in), + suffix='-' + bfile_path.name, + delete=False) + bfile_tp = pathlib.Path(bfile_tmp.name) + action.read_from = bfile_tp + log.info('Rewriting %s to %s', bfile_path, bfile_tp) # The original blend file will have been cached, so we can use it # to avoid re-parsing all data blocks in the to-be-rewritten file. bfile = blendfile.open_cached(bfile_path, assert_cached=True) - bfile.rebind(bfile_pp, mode='rb+') + bfile.copy_and_rebind(bfile_tp, mode='rb+') for usage in action.rewrites: assert isinstance(usage, result.BlockUsage) @@ -204,11 +239,11 @@ class Packer: # Find the same block in the newly copied file. block = bfile.dereference_pointer(usage.block.addr_old) if usage.path_full_field is None: - log.info(' - updating field %s of block %s', - usage.path_dir_field.name.name_only, block) + log.debug(' - updating field %s of block %s', + usage.path_dir_field.name.name_only, block) reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp) written = block.set(usage.path_dir_field.name.name_only, reldir) - log.info(' - written %d bytes', written) + log.debug(' - written %d bytes', written) # BIG FAT ASSUMPTION that the filename (e.g. basename # without path) does not change. This makes things much @@ -216,18 +251,24 @@ class Packer: # filename fields are in different blocks. See the # blocks2assets.scene() function for the implementation. else: - log.info(' - updating field %s of block %s', - usage.path_full_field.name.name_only, block) + log.debug(' - updating field %s of block %s', + usage.path_full_field.name.name_only, block) written = block.set(usage.path_full_field.name.name_only, relpath) - log.info(' - written %d bytes', written) + log.debug(' - written %d bytes', written) + + # Make sure we close the file, otherwise changes may not be + # flushed before it gets copied. + bfile.close() def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction, fc: queued_copy.FileCopier): - log.debug('Queueing copy of %s and dependencies', asset_path) - # Copy the asset itself. - packed_path = self._actions[asset_path].new_path - self._copy_to_target(asset_path, packed_path, fc) + packed_path = action.new_path + read_path = action.read_from or asset_path + + # TODO(Sybren): if the asset is a rewritten blend file (and thus a copy), + # do a move instead of a copy. + self._copy_to_target(read_path, packed_path, fc) # Copy its sequence dependencies. for usage in action.usages: @@ -253,4 +294,5 @@ class Packer: print('%s → %s' % (asset_path, target)) self._file_count += 1 return + log.debug('Queueing copy of %s', asset_path) fc.queue(asset_path, target) diff --git a/tests/test_blendfile_loading.py b/tests/test_blendfile_loading.py index be55646..c892021 100644 --- a/tests/test_blendfile_loading.py +++ b/tests/test_blendfile_loading.py @@ -1,5 +1,6 @@ import os import pathlib +import tempfile from blender_asset_tracer import blendfile from blender_asset_tracer.blendfile import iterators, exceptions @@ -284,3 +285,93 @@ class LoadNonBlendfileTest(AbstractBlendFileTest): def test_no_datablocks(self): with self.assertRaises(exceptions.NoDNA1Block): blendfile.BlendFile(self.blendfiles / 'corrupt_only_magic.blend') + + +class BlendFileCacheTest(AbstractBlendFileTest): + def setUp(self): + super().setUp() + self.tdir = tempfile.TemporaryDirectory() + self.tpath = pathlib.Path(self.tdir.name) + + def tearDown(self): + self.tdir.cleanup() + + def test_open_cached(self): + infile = self.blendfiles / 'basic_file.blend' + bf1 = blendfile.open_cached(infile) + bf2 = blendfile.open_cached(infile) + + # The file should only be opened & parsed once. + self.assertIs(bf1, bf2) + self.assertIs(bf1, blendfile._cached_bfiles[infile]) + + def test_compressed(self): + infile = self.blendfiles / 'linked_cube_compressed.blend' + bf1 = blendfile.open_cached(infile) + bf2 = blendfile.open_cached(infile) + + # The file should only be opened & parsed once. + self.assertIs(bf1, bf2) + self.assertIs(bf1, blendfile._cached_bfiles[infile]) + + def test_closed(self): + infile = self.blendfiles / 'linked_cube_compressed.blend' + bf = blendfile.open_cached(infile) + self.assertIs(bf, blendfile._cached_bfiles[infile]) + + blendfile.close_all_cached() + self.assertTrue(bf.fileobj.closed) + self.assertEqual({}, blendfile._cached_bfiles) + + def test_close_one_file(self): + path1 = self.blendfiles / 'linked_cube_compressed.blend' + path2 = self.blendfiles / 'basic_file.blend' + bf1 = blendfile.open_cached(path1) + bf2 = blendfile.open_cached(path2) + self.assertIs(bf1, blendfile._cached_bfiles[path1]) + + # Closing a file should remove it from the cache. + bf1.close() + self.assertTrue(bf1.fileobj.closed) + self.assertEqual({path2: bf2}, blendfile._cached_bfiles) + + def test_open_and_rebind(self): + infile = self.blendfiles / 'linked_cube.blend' + other = self.tpath / 'copy.blend' + self._open_and_rebind_test(infile, other) + + def test_open_and_rebind_compressed(self): + infile = self.blendfiles / 'linked_cube_compressed.blend' + other = self.tpath / 'copy.blend' + self._open_and_rebind_test(infile, other) + + def _open_and_rebind_test(self, infile: pathlib.Path, other: pathlib.Path): + self.assertFalse(other.exists()) + + bf = blendfile.open_cached(infile) + + self.assertEqual(str(bf.raw_filepath), bf.fileobj.name) + + before_filepath = bf.filepath + before_raw_fp = bf.raw_filepath + before_blocks = bf.blocks + before_compressed = bf.is_compressed + + bf.copy_and_rebind(other, mode='rb+') + + self.assertTrue(other.exists()) + self.assertEqual(before_compressed, bf.is_compressed) + + if bf.is_compressed: + self.assertNotEqual(bf.filepath, bf.raw_filepath) + else: + self.assertEqual(bf.filepath, bf.raw_filepath) + + self.assertNotEqual(before_filepath, bf.filepath) + self.assertNotEqual(before_raw_fp, bf.raw_filepath) + self.assertEqual(other, bf.filepath) + self.assertIs(before_blocks, bf.blocks) + self.assertNotIn(infile, blendfile._cached_bfiles) + self.assertIs(bf, blendfile._cached_bfiles[other]) + + self.assertEqual(str(bf.raw_filepath), bf.fileobj.name) diff --git a/tests/test_pack.py b/tests/test_pack.py index 75e3c9e..219be97 100644 --- a/tests/test_pack.py +++ b/tests/test_pack.py @@ -15,11 +15,16 @@ class AbstractPackTest(AbstractBlendFileTest): def setUpClass(cls): super().setUpClass() logging.getLogger('blender_asset_tracer.pack').setLevel(logging.DEBUG) + logging.getLogger('blender_asset_tracer.blendfile.open_cached').setLevel(logging.DEBUG) + logging.getLogger('blender_asset_tracer.blendfile.open_cached').setLevel(logging.DEBUG) + logging.getLogger('blender_asset_tracer.blendfile.BlendFile').setLevel(logging.DEBUG) def setUp(self): super().setUp() - self.tdir = tempfile.TemporaryDirectory() + self.tdir = tempfile.TemporaryDirectory(suffix='-packtest') self.tpath = pathlib.Path(self.tdir.name) + # self.tpath = pathlib.Path('/tmp/tempdir-packtest') + # self.tpath.mkdir(parents=True, exist_ok=True) def tearDown(self): self.tdir.cleanup() @@ -112,7 +117,33 @@ class AbstractPackTest(AbstractBlendFileTest): self.assertEqual(b'LILib.002', rw_dbllink[1].block_name) self.assertEqual(b'//../material_textures.blend', rw_dbllink[1].asset_path) + def test_execute_rewrite_no_touch_origs(self): + infile = self._pack_with_rewrite() + + # The original file shouldn't be touched. + bfile = blendfile.open_cached(infile, assert_cached=False) + libs = sorted(bfile.code_index[b'LI']) + + self.assertEqual(b'LILib', libs[0].id_name) + self.assertEqual(b'//../linked_cube.blend', libs[0][b'name']) + self.assertEqual(b'LILib.002', libs[1].id_name) + self.assertEqual(b'//../material_textures.blend', libs[1][b'name']) + def test_execute_rewrite(self): + infile = self._pack_with_rewrite() + + extpath = pathlib.Path('//_outside_project', *self.blendfiles.parts[1:]) + extbpath = bpathlib.BlendPath(extpath) + + # Those libraries should be properly rewritten. + bfile = blendfile.open_cached(self.tpath / infile.name, assert_cached=False) + libs = sorted(bfile.code_index[b'LI']) + self.assertEqual(b'LILib', libs[0].id_name) + self.assertEqual(extbpath / b'linked_cube.blend', libs[0][b'name']) + self.assertEqual(b'LILib.002', libs[1].id_name) + self.assertEqual(extbpath / b'material_textures.blend', libs[1][b'name']) + + def _pack_with_rewrite(self): ppath = self.blendfiles / 'subdir' infile = ppath / 'doubly_linked_up.blend' @@ -120,25 +151,7 @@ class AbstractPackTest(AbstractBlendFileTest): packer.strategise() packer.execute() - extpath = pathlib.Path('//_outside_project', *self.blendfiles.parts[1:]) - extbpath = bpathlib.BlendPath(extpath) - - # Those libraries should be properly rewritten. - bfile = blendfile.open_cached(self.tpath / infile.name) - libs = sorted(bfile.code_index[b'LI']) - self.assertEqual(b'LILib', libs[0].id_name) - self.assertEqual(extbpath / b'linked_cube.blend', libs[0][b'name']) - self.assertEqual(b'LILib.002', libs[1].id_name) - self.assertEqual(extbpath / b'material_textures.blend', libs[1][b'name']) - - # The original file shouldn't be touched, though. - bfile = blendfile.open_cached(infile) - libs = sorted(bfile.code_index[b'LI']) - - self.assertEqual(b'LILib', libs[0].id_name) - self.assertEqual(b'//../linked_cube.blend', libs[0][b'name']) - self.assertEqual(b'LILib.002', libs[1].id_name) - self.assertEqual(b'//../material_textures.blend', libs[1][b'name']) + return infile def test_noop(self): ppath = self.blendfiles / 'subdir'