Rewrite blend files in temporary directory, before copying

This allows us to later support uploading to a non-local filesystem, such
as Amazon S3 cloud storage, after path rewriting is complete.
This commit is contained in:
Sybren A. Stüvel 2018-03-09 12:33:15 +01:00
parent 384f1d5107
commit 824ca4bcb7
4 changed files with 212 additions and 45 deletions

View File

@ -28,6 +28,7 @@ import logging
import os import os
import struct import struct
import pathlib import pathlib
import shutil
import tempfile import tempfile
import typing import typing
@ -44,19 +45,28 @@ BFBList = typing.List['BlendFileBlock']
_cached_bfiles = {} # type: typing.Dict[pathlib.Path, BlendFile] _cached_bfiles = {} # type: typing.Dict[pathlib.Path, BlendFile]
def open_cached(path: pathlib.Path, mode='rb', assert_cached=False) -> 'BlendFile': def open_cached(path: pathlib.Path, mode='rb',
assert_cached: typing.Optional[bool] = None) -> 'BlendFile':
"""Open a blend file, ensuring it is only opened once.""" """Open a blend file, ensuring it is only opened once."""
my_log = log.getChild('open_cached')
bfile_path = path.absolute().resolve() bfile_path = path.absolute().resolve()
try:
return _cached_bfiles[bfile_path]
except KeyError:
pass
if assert_cached: if assert_cached is not None:
is_cached = bfile_path in _cached_bfiles
if assert_cached and not is_cached:
raise AssertionError('File %s was not cached' % bfile_path) raise AssertionError('File %s was not cached' % bfile_path)
elif not assert_cached and is_cached:
raise AssertionError('File %s was cached' % bfile_path)
try:
bfile = _cached_bfiles[bfile_path]
except KeyError:
my_log.debug('Opening non-cached %s', path)
bfile = BlendFile(path, mode=mode) bfile = BlendFile(path, mode=mode)
_cached_bfiles[bfile_path] = bfile _cached_bfiles[bfile_path] = bfile
else:
my_log.debug('Returning cached %s', path)
return bfile return bfile
@ -67,7 +77,7 @@ def close_all_cached():
return return
log.debug('Closing %d cached blend files', len(_cached_bfiles)) log.debug('Closing %d cached blend files', len(_cached_bfiles))
for bfile in _cached_bfiles.values(): for bfile in list(_cached_bfiles.values()):
bfile.close() bfile.close()
_cached_bfiles.clear() _cached_bfiles.clear()
@ -136,6 +146,8 @@ class BlendFile:
if 'b' not in mode: if 'b' not in mode:
raise ValueError('Only binary modes are supported, not %r' % mode) raise ValueError('Only binary modes are supported, not %r' % mode)
self.filepath = path
fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) # typing.IO[bytes] fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) # typing.IO[bytes]
fileobj.seek(0, os.SEEK_SET) fileobj.seek(0, os.SEEK_SET)
@ -206,7 +218,7 @@ class BlendFile:
def __exit__(self, exctype, excvalue, traceback): def __exit__(self, exctype, excvalue, traceback):
self.close() self.close()
def rebind(self, path: pathlib.Path, mode='rb'): def copy_and_rebind(self, path: pathlib.Path, mode='rb'):
"""Change which file is bound to this BlendFile. """Change which file is bound to this BlendFile.
This allows cloning a previously opened file, and rebinding it to reuse This allows cloning a previously opened file, and rebinding it to reuse
@ -217,6 +229,10 @@ class BlendFile:
self.close() self.close()
_uncache(self.filepath) _uncache(self.filepath)
self.log.debug('Copying %s to %s', self.filepath, path)
# TODO(Sybren): remove str() calls when targeting Python 3.6+
shutil.copy(str(self.filepath), str(path))
self._open_file(path, mode=mode) self._open_file(path, mode=mode)
_cache(path, self) _cache(path, self)
@ -261,6 +277,11 @@ class BlendFile:
self.fileobj.close() self.fileobj.close()
self._is_modified = False self._is_modified = False
try:
del _cached_bfiles[self.filepath]
except KeyError:
pass
def ensure_subtype_smaller(self, sdna_index_curr, sdna_index_next): def ensure_subtype_smaller(self, sdna_index_curr, sdna_index_next):
# never refine to a smaller type # never refine to a smaller type
curr_struct = self.structs[sdna_index_curr] curr_struct = self.structs[sdna_index_curr]

View File

@ -3,6 +3,8 @@ import enum
import functools import functools
import logging import logging
import pathlib import pathlib
import tempfile
import typing import typing
from blender_asset_tracer import trace, bpathlib, blendfile from blender_asset_tracer import trace, bpathlib, blendfile
@ -33,7 +35,12 @@ class AssetAction:
self.new_path = None # type: pathlib.Path self.new_path = None # type: pathlib.Path
"""Absolute path to the asset in the BAT Pack.""" """Absolute path to the asset in the BAT Pack."""
self.read_from = None # type: pathlib.Path self.read_from = None # type: typing.Optional[pathlib.Path]
"""Optional path from which to read the asset.
This is used when blend files have been rewritten. It is assumed that
when this property is set, the file can be moved instead of copied.
"""
self.rewrites = [] # type: typing.List[result.BlockUsage] self.rewrites = [] # type: typing.List[result.BlockUsage]
"""BlockUsage objects in this asset that may require rewriting. """BlockUsage objects in this asset that may require rewriting.
@ -65,6 +72,19 @@ class Packer:
# Number of files we would copy, if not for --noop # Number of files we would copy, if not for --noop
self._file_count = 0 self._file_count = 0
self._tmpdir = tempfile.TemporaryDirectory(suffix='-batpack')
self._rewrite_in = pathlib.Path(self._tmpdir.name)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def close(self):
"""Clean up any temporary files."""
self._tmpdir.cleanup()
def strategise(self): def strategise(self):
"""Determine what to do with the assets. """Determine what to do with the assets.
@ -148,9 +168,9 @@ class Packer:
"""Execute the strategy.""" """Execute the strategy."""
assert self._actions, 'Run strategise() first' assert self._actions, 'Run strategise() first'
self._copy_files_to_target()
if not self.noop: if not self.noop:
self._rewrite_paths() self._rewrite_paths()
self._copy_files_to_target()
def _copy_files_to_target(self): def _copy_files_to_target(self):
"""Copy all assets to the target directoy. """Copy all assets to the target directoy.
@ -172,21 +192,36 @@ class Packer:
fc.done_and_join() fc.done_and_join()
def _rewrite_paths(self): def _rewrite_paths(self):
"""Rewrite paths to the new location of the assets.""" """Rewrite paths to the new location of the assets.
Writes the rewritten blend files to a temporary location.
"""
for bfile_path, action in self._actions.items(): for bfile_path, action in self._actions.items():
if not action.rewrites: if not action.rewrites:
continue continue
assert isinstance(bfile_path, pathlib.Path) assert isinstance(bfile_path, pathlib.Path)
# bfile_pp is the final path of this blend file in the BAT pack.
# It is used to determine relative paths to other blend files.
# It is *not* used for any disk I/O, since the file may not even
# exist on the local filesystem.
bfile_pp = self._actions[bfile_path].new_path bfile_pp = self._actions[bfile_path].new_path
log.info('Rewriting %s', bfile_pp) # Use tempfile to create a unique name in our temporary directoy.
# The file should be deleted when self.close() is called, and not
# when the bfile_tp object is GC'd.
bfile_tmp = tempfile.NamedTemporaryFile(dir=str(self._rewrite_in),
suffix='-' + bfile_path.name,
delete=False)
bfile_tp = pathlib.Path(bfile_tmp.name)
action.read_from = bfile_tp
log.info('Rewriting %s to %s', bfile_path, bfile_tp)
# The original blend file will have been cached, so we can use it # The original blend file will have been cached, so we can use it
# to avoid re-parsing all data blocks in the to-be-rewritten file. # to avoid re-parsing all data blocks in the to-be-rewritten file.
bfile = blendfile.open_cached(bfile_path, assert_cached=True) bfile = blendfile.open_cached(bfile_path, assert_cached=True)
bfile.rebind(bfile_pp, mode='rb+') bfile.copy_and_rebind(bfile_tp, mode='rb+')
for usage in action.rewrites: for usage in action.rewrites:
assert isinstance(usage, result.BlockUsage) assert isinstance(usage, result.BlockUsage)
@ -204,11 +239,11 @@ class Packer:
# Find the same block in the newly copied file. # Find the same block in the newly copied file.
block = bfile.dereference_pointer(usage.block.addr_old) block = bfile.dereference_pointer(usage.block.addr_old)
if usage.path_full_field is None: if usage.path_full_field is None:
log.info(' - updating field %s of block %s', log.debug(' - updating field %s of block %s',
usage.path_dir_field.name.name_only, block) usage.path_dir_field.name.name_only, block)
reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp) reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp)
written = block.set(usage.path_dir_field.name.name_only, reldir) written = block.set(usage.path_dir_field.name.name_only, reldir)
log.info(' - written %d bytes', written) log.debug(' - written %d bytes', written)
# BIG FAT ASSUMPTION that the filename (e.g. basename # BIG FAT ASSUMPTION that the filename (e.g. basename
# without path) does not change. This makes things much # without path) does not change. This makes things much
@ -216,18 +251,24 @@ class Packer:
# filename fields are in different blocks. See the # filename fields are in different blocks. See the
# blocks2assets.scene() function for the implementation. # blocks2assets.scene() function for the implementation.
else: else:
log.info(' - updating field %s of block %s', log.debug(' - updating field %s of block %s',
usage.path_full_field.name.name_only, block) usage.path_full_field.name.name_only, block)
written = block.set(usage.path_full_field.name.name_only, relpath) written = block.set(usage.path_full_field.name.name_only, relpath)
log.info(' - written %d bytes', written) log.debug(' - written %d bytes', written)
# Make sure we close the file, otherwise changes may not be
# flushed before it gets copied.
bfile.close()
def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction, def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction,
fc: queued_copy.FileCopier): fc: queued_copy.FileCopier):
log.debug('Queueing copy of %s and dependencies', asset_path)
# Copy the asset itself. # Copy the asset itself.
packed_path = self._actions[asset_path].new_path packed_path = action.new_path
self._copy_to_target(asset_path, packed_path, fc) read_path = action.read_from or asset_path
# TODO(Sybren): if the asset is a rewritten blend file (and thus a copy),
# do a move instead of a copy.
self._copy_to_target(read_path, packed_path, fc)
# Copy its sequence dependencies. # Copy its sequence dependencies.
for usage in action.usages: for usage in action.usages:
@ -253,4 +294,5 @@ class Packer:
print('%s%s' % (asset_path, target)) print('%s%s' % (asset_path, target))
self._file_count += 1 self._file_count += 1
return return
log.debug('Queueing copy of %s', asset_path)
fc.queue(asset_path, target) fc.queue(asset_path, target)

View File

@ -1,5 +1,6 @@
import os import os
import pathlib import pathlib
import tempfile
from blender_asset_tracer import blendfile from blender_asset_tracer import blendfile
from blender_asset_tracer.blendfile import iterators, exceptions from blender_asset_tracer.blendfile import iterators, exceptions
@ -284,3 +285,93 @@ class LoadNonBlendfileTest(AbstractBlendFileTest):
def test_no_datablocks(self): def test_no_datablocks(self):
with self.assertRaises(exceptions.NoDNA1Block): with self.assertRaises(exceptions.NoDNA1Block):
blendfile.BlendFile(self.blendfiles / 'corrupt_only_magic.blend') blendfile.BlendFile(self.blendfiles / 'corrupt_only_magic.blend')
class BlendFileCacheTest(AbstractBlendFileTest):
def setUp(self):
super().setUp()
self.tdir = tempfile.TemporaryDirectory()
self.tpath = pathlib.Path(self.tdir.name)
def tearDown(self):
self.tdir.cleanup()
def test_open_cached(self):
infile = self.blendfiles / 'basic_file.blend'
bf1 = blendfile.open_cached(infile)
bf2 = blendfile.open_cached(infile)
# The file should only be opened & parsed once.
self.assertIs(bf1, bf2)
self.assertIs(bf1, blendfile._cached_bfiles[infile])
def test_compressed(self):
infile = self.blendfiles / 'linked_cube_compressed.blend'
bf1 = blendfile.open_cached(infile)
bf2 = blendfile.open_cached(infile)
# The file should only be opened & parsed once.
self.assertIs(bf1, bf2)
self.assertIs(bf1, blendfile._cached_bfiles[infile])
def test_closed(self):
infile = self.blendfiles / 'linked_cube_compressed.blend'
bf = blendfile.open_cached(infile)
self.assertIs(bf, blendfile._cached_bfiles[infile])
blendfile.close_all_cached()
self.assertTrue(bf.fileobj.closed)
self.assertEqual({}, blendfile._cached_bfiles)
def test_close_one_file(self):
path1 = self.blendfiles / 'linked_cube_compressed.blend'
path2 = self.blendfiles / 'basic_file.blend'
bf1 = blendfile.open_cached(path1)
bf2 = blendfile.open_cached(path2)
self.assertIs(bf1, blendfile._cached_bfiles[path1])
# Closing a file should remove it from the cache.
bf1.close()
self.assertTrue(bf1.fileobj.closed)
self.assertEqual({path2: bf2}, blendfile._cached_bfiles)
def test_open_and_rebind(self):
infile = self.blendfiles / 'linked_cube.blend'
other = self.tpath / 'copy.blend'
self._open_and_rebind_test(infile, other)
def test_open_and_rebind_compressed(self):
infile = self.blendfiles / 'linked_cube_compressed.blend'
other = self.tpath / 'copy.blend'
self._open_and_rebind_test(infile, other)
def _open_and_rebind_test(self, infile: pathlib.Path, other: pathlib.Path):
self.assertFalse(other.exists())
bf = blendfile.open_cached(infile)
self.assertEqual(str(bf.raw_filepath), bf.fileobj.name)
before_filepath = bf.filepath
before_raw_fp = bf.raw_filepath
before_blocks = bf.blocks
before_compressed = bf.is_compressed
bf.copy_and_rebind(other, mode='rb+')
self.assertTrue(other.exists())
self.assertEqual(before_compressed, bf.is_compressed)
if bf.is_compressed:
self.assertNotEqual(bf.filepath, bf.raw_filepath)
else:
self.assertEqual(bf.filepath, bf.raw_filepath)
self.assertNotEqual(before_filepath, bf.filepath)
self.assertNotEqual(before_raw_fp, bf.raw_filepath)
self.assertEqual(other, bf.filepath)
self.assertIs(before_blocks, bf.blocks)
self.assertNotIn(infile, blendfile._cached_bfiles)
self.assertIs(bf, blendfile._cached_bfiles[other])
self.assertEqual(str(bf.raw_filepath), bf.fileobj.name)

View File

@ -15,11 +15,16 @@ class AbstractPackTest(AbstractBlendFileTest):
def setUpClass(cls): def setUpClass(cls):
super().setUpClass() super().setUpClass()
logging.getLogger('blender_asset_tracer.pack').setLevel(logging.DEBUG) logging.getLogger('blender_asset_tracer.pack').setLevel(logging.DEBUG)
logging.getLogger('blender_asset_tracer.blendfile.open_cached').setLevel(logging.DEBUG)
logging.getLogger('blender_asset_tracer.blendfile.open_cached').setLevel(logging.DEBUG)
logging.getLogger('blender_asset_tracer.blendfile.BlendFile').setLevel(logging.DEBUG)
def setUp(self): def setUp(self):
super().setUp() super().setUp()
self.tdir = tempfile.TemporaryDirectory() self.tdir = tempfile.TemporaryDirectory(suffix='-packtest')
self.tpath = pathlib.Path(self.tdir.name) self.tpath = pathlib.Path(self.tdir.name)
# self.tpath = pathlib.Path('/tmp/tempdir-packtest')
# self.tpath.mkdir(parents=True, exist_ok=True)
def tearDown(self): def tearDown(self):
self.tdir.cleanup() self.tdir.cleanup()
@ -112,7 +117,33 @@ class AbstractPackTest(AbstractBlendFileTest):
self.assertEqual(b'LILib.002', rw_dbllink[1].block_name) self.assertEqual(b'LILib.002', rw_dbllink[1].block_name)
self.assertEqual(b'//../material_textures.blend', rw_dbllink[1].asset_path) self.assertEqual(b'//../material_textures.blend', rw_dbllink[1].asset_path)
def test_execute_rewrite_no_touch_origs(self):
infile = self._pack_with_rewrite()
# The original file shouldn't be touched.
bfile = blendfile.open_cached(infile, assert_cached=False)
libs = sorted(bfile.code_index[b'LI'])
self.assertEqual(b'LILib', libs[0].id_name)
self.assertEqual(b'//../linked_cube.blend', libs[0][b'name'])
self.assertEqual(b'LILib.002', libs[1].id_name)
self.assertEqual(b'//../material_textures.blend', libs[1][b'name'])
def test_execute_rewrite(self): def test_execute_rewrite(self):
infile = self._pack_with_rewrite()
extpath = pathlib.Path('//_outside_project', *self.blendfiles.parts[1:])
extbpath = bpathlib.BlendPath(extpath)
# Those libraries should be properly rewritten.
bfile = blendfile.open_cached(self.tpath / infile.name, assert_cached=False)
libs = sorted(bfile.code_index[b'LI'])
self.assertEqual(b'LILib', libs[0].id_name)
self.assertEqual(extbpath / b'linked_cube.blend', libs[0][b'name'])
self.assertEqual(b'LILib.002', libs[1].id_name)
self.assertEqual(extbpath / b'material_textures.blend', libs[1][b'name'])
def _pack_with_rewrite(self):
ppath = self.blendfiles / 'subdir' ppath = self.blendfiles / 'subdir'
infile = ppath / 'doubly_linked_up.blend' infile = ppath / 'doubly_linked_up.blend'
@ -120,25 +151,7 @@ class AbstractPackTest(AbstractBlendFileTest):
packer.strategise() packer.strategise()
packer.execute() packer.execute()
extpath = pathlib.Path('//_outside_project', *self.blendfiles.parts[1:]) return infile
extbpath = bpathlib.BlendPath(extpath)
# Those libraries should be properly rewritten.
bfile = blendfile.open_cached(self.tpath / infile.name)
libs = sorted(bfile.code_index[b'LI'])
self.assertEqual(b'LILib', libs[0].id_name)
self.assertEqual(extbpath / b'linked_cube.blend', libs[0][b'name'])
self.assertEqual(b'LILib.002', libs[1].id_name)
self.assertEqual(extbpath / b'material_textures.blend', libs[1][b'name'])
# The original file shouldn't be touched, though.
bfile = blendfile.open_cached(infile)
libs = sorted(bfile.code_index[b'LI'])
self.assertEqual(b'LILib', libs[0].id_name)
self.assertEqual(b'//../linked_cube.blend', libs[0][b'name'])
self.assertEqual(b'LILib.002', libs[1].id_name)
self.assertEqual(b'//../material_textures.blend', libs[1][b'name'])
def test_noop(self): def test_noop(self):
ppath = self.blendfiles / 'subdir' ppath = self.blendfiles / 'subdir'