From 3c5cca81a5a4e083eb2c91530d1a663bf3013ee8 Mon Sep 17 00:00:00 2001 From: Pullusb Date: Mon, 18 Oct 2021 15:54:04 +0200 Subject: [PATCH] initial commit --- .gitignore | 2 + README.md | 4 + __init__.py | 279 +++++++++++++ blendfile/__init__.py | 773 ++++++++++++++++++++++++++++++++++++ blendfile/dna.py | 334 ++++++++++++++++ blendfile/dna_io.py | 163 ++++++++ blendfile/exceptions.py | 76 ++++ blendfile/header.py | 78 ++++ blendfile/iterators.py | 70 ++++ bpathlib.py | 212 ++++++++++ cdefs.py | 74 ++++ cli/__init__.py | 99 +++++ cli/blocks.py | 129 ++++++ cli/common.py | 99 +++++ cli/list_deps.py | 152 +++++++ cli/pack.py | 200 ++++++++++ compressor.py | 77 ++++ pack/__init__.py | 587 +++++++++++++++++++++++++++ pack/filesystem.py | 273 +++++++++++++ pack/progress.py | 148 +++++++ pack/s3.py | 182 +++++++++ pack/shaman/__init__.py | 130 ++++++ pack/shaman/cache.py | 197 +++++++++ pack/shaman/client.py | 129 ++++++ pack/shaman/time_tracker.py | 32 ++ pack/shaman/transfer.py | 359 +++++++++++++++++ pack/transfer.py | 221 +++++++++++ pack/zipped.py | 89 +++++ trace/__init__.py | 80 ++++ trace/blocks2assets.py | 210 ++++++++++ trace/expanders.py | 294 ++++++++++++++ trace/file2blocks.py | 176 ++++++++ trace/file_sequence.py | 69 ++++ trace/modifier_walkers.py | 251 ++++++++++++ trace/progress.py | 31 ++ trace/result.py | 182 +++++++++ 36 files changed, 6461 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 __init__.py create mode 100644 blendfile/__init__.py create mode 100644 blendfile/dna.py create mode 100644 blendfile/dna_io.py create mode 100644 blendfile/exceptions.py create mode 100644 blendfile/header.py create mode 100644 blendfile/iterators.py create mode 100644 bpathlib.py create mode 100644 cdefs.py create mode 100644 cli/__init__.py create mode 100644 cli/blocks.py create mode 100644 cli/common.py create mode 100644 cli/list_deps.py create mode 100644 cli/pack.py create mode 100644 compressor.py create mode 100644 pack/__init__.py create mode 100644 pack/filesystem.py create mode 100644 pack/progress.py create mode 100644 pack/s3.py create mode 100644 pack/shaman/__init__.py create mode 100644 pack/shaman/cache.py create mode 100644 pack/shaman/client.py create mode 100644 pack/shaman/time_tracker.py create mode 100644 pack/shaman/transfer.py create mode 100644 pack/transfer.py create mode 100644 pack/zipped.py create mode 100644 trace/__init__.py create mode 100644 trace/blocks2assets.py create mode 100644 trace/expanders.py create mode 100644 trace/file2blocks.py create mode 100644 trace/file_sequence.py create mode 100644 trace/modifier_walkers.py create mode 100644 trace/progress.py create mode 100644 trace/result.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..17ae4fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +*.py[cod] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..bba2054 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# BAT (blender asset tracer) + +Modified version +with Zip packer included \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..5e250b7 --- /dev/null +++ b/__init__.py @@ -0,0 +1,279 @@ +# ##### BEGIN GPL LICENSE BLOCK ##### +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +# Copyright (C) 2014-2018 Blender Foundation +# +# ##### END GPL LICENSE BLOCK ##### + +# + +__version__ = '1.3' + +bl_info = { + "name": "Blender Asset Tracer", + "author": "Campbell Barton, Sybren A. Stüvel, Loïc Charrière and Clément Ducarteron", + "version": (1, 3, 0), + "blender": (2, 80, 0), + "location": "File > External Data > BAT", + "description": "Utility for packing blend files", + "warning": "", + "wiki_url": "https://developer.blender.org/project/profile/79/", + "category": "Import-Export", +} + +import bpy +from bpy.types import Operator +from bpy_extras.io_utils import ExportHelper +import zipfile +from blender_asset_tracer.pack import zipped +from pathlib import Path, PurePath + +import os +import re +import sys +import subprocess +import tempfile +from blender_asset_tracer.trace import deps + + +class ExportBatPack(Operator, ExportHelper): + bl_idname = "export_bat.pack" + bl_label = "Export to Archive using BAT" + + # ExportHelper + filename_ext = ".zip" + + @classmethod + def poll(cls, context): + return bpy.data.is_saved + + + def execute(self, context): + import os + outfname = bpy.path.ensure_ext(self.filepath, ".zip") + scn = bpy.context.scene + + print(f'outfname {outfname}') + self.report({'INFO'},'Executing ZipPacker ...') + + + with zipped.ZipPacker( + Path(bpy.data.filepath), + Path(bpy.data.filepath).parent, + str(self.filepath)) as packer: + packer.strategise() + packer.execute() + self.report({'INFO'},'Packing successful !') + + with zipfile.ZipFile(str(self.filepath)) as inzip: + inzip.testzip() + + self.report({'INFO'}, 'Written to %s' % outfname) + + return {'FINISHED'} + + +class ADM_OT_export_zip(Operator, ExportHelper): + """Export current blendfile as .ZIP""" + bl_label = "Export File to .ZIP" + bl_idname = "adm.export_zip" + + filename_ext = '.zip' + + + root_dir : bpy.props.StringProperty( + name="Root", + description='Top Level Folder of your project.' + '\nFor now Copy/Paste correct folder by hand if default is incorrect.' + '\n!!! Everything outside won\'t be zipped !!!', + ) + + @classmethod + def poll(cls, context): + return bpy.data.is_saved + + def execute(self, context): + + root_dir = self.root_dir + print('root_dir: ', root_dir) + + def open_folder(folderpath): + """ + open the folder at the path given + with cmd relative to user's OS + """ + + my_os = sys.platform + if my_os.startswith(('linux','freebsd')): + cmd = 'xdg-open' + elif my_os.startswith('win'): + cmd = 'explorer' + if not folderpath: + return('/') + else: + cmd = 'open' + + if not folderpath: + return('//') + + if os.path.isfile(folderpath): # When pointing to a file + select = False + if my_os.startswith('win'): + # Keep same path but add "/select" the file (windows cmd option) + cmd = 'explorer /select,' + select = True + + elif my_os.startswith(('linux','freebsd')): + if which('nemo'): + cmd = 'nemo --no-desktop' + select = True + elif which('nautilus'): + cmd = 'nautilus --no-desktop' + select = True + + if not select: + # Use directory of the file + folderpath = os.path.dirname(folderpath) + + folderpath = os.path.normpath(folderpath) + fullcmd = cmd.split() + [folderpath] + # print('use opening command :', fullcmd) + subprocess.Popen(fullcmd) + + + def zip_with_structure(zip, filelist, root=None, compressed=True): + ''' + Zip passed filelist into a zip with root path as toplevel tree + If root is not passed, the shortest path in filelist becomes the root + + :zip: output fullpath of the created zip + :filelist: list of filepaht as string or Path object (converted anyway) + :root: top level of the created hierarchy (not included), file that are not inside root are discarded + :compressed: Decide if zip is compressed or not + ''' + + filelist = [Path(f) for f in filelist] # ensure pathlib + if not filelist: + return + if not root: + # autodetect the path thats is closest to root + #root = sorted(filelist, key=lambda f: f.as_posix().count('/'))[0].parent + filelist_abs = [str(fl) for fl in filelist] + root = Path(os.path.commonpath(filelist_abs)) + #print('root: ', root) + else: + root = Path(root) + + compress_type = zipfile.ZIP_DEFLATED if compressed else zipfile.ZIP_STORED + with zipfile.ZipFile(zip, 'w',compress_type) as zipObj: + for f in filelist: + #print('f: ', f, type(f)) + if not f.exists(): + print(f'Not exists: {f.name}') + continue + if str(root) not in str(f): + print(f'{f} is out of root {root}') + continue + + ## + arcname = f.as_posix().replace(root.as_posix(), '').lstrip('/') + print(f'adding: {arcname}') + zipObj.write(f, arcname) + + return zip, root + + + links = [] + + current_file = Path(bpy.data.filepath) + links.append(str(current_file)) + file_link = list(deps(current_file)) + for l in file_link: + if Path(l.abspath).exists() == False: + continue + + links.append(l.abspath) + if l.is_sequence: + split_path = PurePath(l.abspath).parts + file_dir = os.path.join(*split_path[:-1]) + file_name = split_path[-1] + + pattern = '[0-9]+\.[a-zA-Z]+$' + file_name = re.sub(pattern, '', file_name) + + for im in os.listdir(Path(f"{file_dir}/")): + if im.startswith(file_name) and re.search(pattern, im): + links.append(os.path.join(file_dir, im)) + + links = list(set(links)) + + #output_name = current_file.name + output = self.filepath + #print('output: ', output) + + root_dir = zip_with_structure(output, links, root_dir) + root_dir = str(root_dir[1]) + + log_output = Path(tempfile.gettempdir(),'README.txt') + with open(log_output, 'w') as log: + log.write("File is located here:") + log.write(f"\n - /{str(current_file).replace(root_dir,'')}") + + with zipfile.ZipFile(output, 'a') as zipObj: + zipObj.write(log_output, log_output.name) + + ### MEME CHOSE QUE + #zipObj = zipfile.ZipFile(output, 'a') + #zipObj.write(log_output, log_output.name) + #zipObj.close() + + open_folder(Path(output).parent) + + return {'FINISHED'} + + +def menu_func(self, context): + layout = self.layout + layout.separator() + layout.operator(ExportBatPack.bl_idname) + filepath = layout.operator(ADM_OT_export_zip.bl_idname) + root_dir_env = os.getenv('ZIP_ROOT') + filepath.root_dir = '' if root_dir_env == None else root_dir_env #os.getenv('PROJECT_STORE') + + +classes = ( + ExportBatPack, + ADM_OT_export_zip, +) + + +def register(): + for cls in classes: + bpy.utils.register_class(cls) + + bpy.types.TOPBAR_MT_file_external_data.append(menu_func) + + +def unregister(): + for cls in classes: + bpy.utils.unregister_class(cls) + + bpy.types.TOPBAR_MT_file_external_data.remove(menu_func) + + +if __name__ == "__main__": + register() + diff --git a/blendfile/__init__.py b/blendfile/__init__.py new file mode 100644 index 0000000..29afff4 --- /dev/null +++ b/blendfile/__init__.py @@ -0,0 +1,773 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2009, At Mind B.V. - Jeroen Bakker +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel + +import atexit +import collections +import functools +import gzip +import logging +import os +import struct +import pathlib +import shutil +import tempfile +import typing + +from . import exceptions, dna_io, dna, header +from blender_asset_tracer import bpathlib + +log = logging.getLogger(__name__) + +FILE_BUFFER_SIZE = 1024 * 1024 +BLENDFILE_MAGIC = b'BLENDER' +GZIP_MAGIC = b'\x1f\x8b' +BFBList = typing.List['BlendFileBlock'] + +_cached_bfiles = {} # type: typing.Dict[pathlib.Path, BlendFile] + + +def open_cached(path: pathlib.Path, mode='rb', + assert_cached: typing.Optional[bool] = None) -> 'BlendFile': + """Open a blend file, ensuring it is only opened once.""" + my_log = log.getChild('open_cached') + bfile_path = bpathlib.make_absolute(path) + + if assert_cached is not None: + is_cached = bfile_path in _cached_bfiles + if assert_cached and not is_cached: + raise AssertionError('File %s was not cached' % bfile_path) + elif not assert_cached and is_cached: + raise AssertionError('File %s was cached' % bfile_path) + + try: + bfile = _cached_bfiles[bfile_path] + except KeyError: + my_log.debug('Opening non-cached %s', path) + bfile = BlendFile(path, mode=mode) + _cached_bfiles[bfile_path] = bfile + else: + my_log.debug('Returning cached %s', path) + + return bfile + + +@atexit.register +def close_all_cached() -> None: + if not _cached_bfiles: + # Don't even log anything when there is nothing to close + return + + log.debug('Closing %d cached blend files', len(_cached_bfiles)) + for bfile in list(_cached_bfiles.values()): + bfile.close() + _cached_bfiles.clear() + + +def _cache(path: pathlib.Path, bfile: 'BlendFile'): + """Add a BlendFile to the cache.""" + bfile_path = bpathlib.make_absolute(path) + _cached_bfiles[bfile_path] = bfile + + +def _uncache(path: pathlib.Path): + """Remove a BlendFile object from the cache.""" + bfile_path = bpathlib.make_absolute(path) + _cached_bfiles.pop(bfile_path, None) + + +class BlendFile: + """Representation of a blend file. + + :ivar filepath: which file this object represents. + :ivar raw_filepath: which file is accessed; same as filepath for + uncompressed files, but a temporary file for compressed files. + :ivar fileobj: the file object that's being accessed. + """ + log = log.getChild('BlendFile') + + def __init__(self, path: pathlib.Path, mode='rb') -> None: + """Create a BlendFile instance for the blend file at the path. + + Opens the file for reading or writing pending on the access. Compressed + blend files are uncompressed to a temporary location before opening. + + :param path: the file to open + :param mode: see mode description of pathlib.Path.open() + """ + self.filepath = path + self.raw_filepath = path + self._is_modified = False + self.fileobj = self._open_file(path, mode) + + self.blocks = [] # type: BFBList + """BlendFileBlocks of this file, in disk order.""" + + self.code_index = collections.defaultdict(list) # type: typing.Dict[bytes, BFBList] + self.structs = [] # type: typing.List[dna.Struct] + self.sdna_index_from_id = {} # type: typing.Dict[bytes, int] + self.block_from_addr = {} # type: typing.Dict[int, BlendFileBlock] + + self.header = header.BlendFileHeader(self.fileobj, self.raw_filepath) + self.block_header_struct = self.header.create_block_header_struct() + self._load_blocks() + + def _open_file(self, path: pathlib.Path, mode: str) -> typing.IO[bytes]: + """Open a blend file, decompressing if necessary. + + This does not parse the blend file yet, just makes sure that + self.fileobj is opened and that self.filepath and self.raw_filepath + are set. + + :raises exceptions.BlendFileError: when the blend file doesn't have the + correct magic bytes. + """ + + if 'b' not in mode: + raise ValueError('Only binary modes are supported, not %r' % mode) + + self.filepath = path + + fileobj = path.open(mode, buffering=FILE_BUFFER_SIZE) # typing.IO[bytes] + fileobj.seek(0, os.SEEK_SET) + + magic = fileobj.read(len(BLENDFILE_MAGIC)) + if magic == BLENDFILE_MAGIC: + self.is_compressed = False + self.raw_filepath = path + return fileobj + + if magic[:2] == GZIP_MAGIC: + self.is_compressed = True + + log.debug("compressed blendfile detected: %s", path) + # Decompress to a temporary file. + tmpfile = tempfile.NamedTemporaryFile() + fileobj.seek(0, os.SEEK_SET) + with gzip.GzipFile(fileobj=fileobj, mode=mode) as gzfile: + magic = gzfile.read(len(BLENDFILE_MAGIC)) + if magic != BLENDFILE_MAGIC: + raise exceptions.BlendFileError("Compressed file is not a blend file", path) + + data = magic + while data: + tmpfile.write(data) + data = gzfile.read(FILE_BUFFER_SIZE) + + # Further interaction should be done with the uncompressed file. + self.raw_filepath = pathlib.Path(tmpfile.name) + fileobj.close() + return tmpfile + + fileobj.close() + raise exceptions.BlendFileError("File is not a blend file", path) + + def _load_blocks(self) -> None: + """Read the blend file to load its DNA structure to memory.""" + + self.structs.clear() + self.sdna_index_from_id.clear() + while True: + block = BlendFileBlock(self) + if block.code == b'ENDB': + break + + if block.code == b'DNA1': + self.decode_structs(block) + else: + self.fileobj.seek(block.size, os.SEEK_CUR) + + self.blocks.append(block) + self.code_index[block.code].append(block) + self.block_from_addr[block.addr_old] = block + + if not self.structs: + raise exceptions.NoDNA1Block("No DNA1 block in file, not a valid .blend file", + self.filepath) + + def __repr__(self) -> str: + clsname = self.__class__.__qualname__ + if self.filepath == self.raw_filepath: + return '<%s %r>' % (clsname, self.filepath) + return '<%s %r reading from %r>' % (clsname, self.filepath, self.raw_filepath) + + def __enter__(self) -> 'BlendFile': + return self + + def __exit__(self, exctype, excvalue, traceback) -> None: + self.close() + + def copy_and_rebind(self, path: pathlib.Path, mode='rb') -> None: + """Change which file is bound to this BlendFile. + + This allows cloning a previously opened file, and rebinding it to reuse + the already-loaded DNA structs and data blocks. + """ + log.debug('Rebinding %r to %s', self, path) + + self.close() + _uncache(self.filepath) + + self.log.debug('Copying %s to %s', self.filepath, path) + # TODO(Sybren): remove str() calls when targeting Python 3.6+ + shutil.copy(str(self.filepath), str(path)) + + self.fileobj = self._open_file(path, mode=mode) + _cache(path, self) + + @property + def is_modified(self) -> bool: + return self._is_modified + + def mark_modified(self) -> None: + """Recompess the file when it is closed.""" + self.log.debug('Marking %s as modified', self.raw_filepath) + self._is_modified = True + + def find_blocks_from_code(self, code: bytes) -> typing.List['BlendFileBlock']: + assert isinstance(code, bytes) + return self.code_index[code] + + def close(self) -> None: + """Close the blend file. + + Recompresses the blend file if it was compressed and changed. + """ + if not self.fileobj: + return + + if self._is_modified: + log.debug('closing blend file %s after it was modified', self.raw_filepath) + + if self._is_modified and self.is_compressed: + log.debug("recompressing modified blend file %s", self.raw_filepath) + self.fileobj.seek(os.SEEK_SET, 0) + + with gzip.open(str(self.filepath), 'wb') as gzfile: + while True: + data = self.fileobj.read(FILE_BUFFER_SIZE) + if not data: + break + gzfile.write(data) + log.debug("compressing to %s finished", self.filepath) + + # Close the file object after recompressing, as it may be a temporary + # file that'll disappear as soon as we close it. + self.fileobj.close() + self._is_modified = False + + try: + del _cached_bfiles[self.filepath] + except KeyError: + pass + + def ensure_subtype_smaller(self, sdna_index_curr, sdna_index_next) -> None: + # never refine to a smaller type + curr_struct = self.structs[sdna_index_curr] + next_struct = self.structs[sdna_index_next] + if curr_struct.size > next_struct.size: + raise RuntimeError("Can't refine to smaller type (%s -> %s)" % + (curr_struct.dna_type_id.decode('utf-8'), + next_struct.dna_type_id.decode('utf-8'))) + + def decode_structs(self, block: 'BlendFileBlock'): + """ + DNACatalog is a catalog of all information in the DNA1 file-block + """ + self.log.debug("building DNA catalog") + + # Get some names in the local scope for faster access. + structs = self.structs + sdna_index_from_id = self.sdna_index_from_id + endian = self.header.endian + shortstruct = endian.USHORT + shortstruct2 = endian.USHORT2 + intstruct = endian.UINT + assert intstruct.size == 4 + + def pad_up_4(off: int) -> int: + return (off + 3) & ~3 + + data = self.fileobj.read(block.size) + types = [] + typenames = [] + + offset = 8 + names_len = intstruct.unpack_from(data, offset)[0] + offset += 4 + + self.log.debug("building #%d names" % names_len) + for _ in range(names_len): + typename = endian.read_data0_offset(data, offset) + offset = offset + len(typename) + 1 + typenames.append(dna.Name(typename)) + + offset = pad_up_4(offset) + offset += 4 + types_len = intstruct.unpack_from(data, offset)[0] + offset += 4 + self.log.debug("building #%d types" % types_len) + for _ in range(types_len): + dna_type_id = endian.read_data0_offset(data, offset) + types.append(dna.Struct(dna_type_id)) + offset += len(dna_type_id) + 1 + + offset = pad_up_4(offset) + offset += 4 + self.log.debug("building #%d type-lengths" % types_len) + for i in range(types_len): + typelen = shortstruct.unpack_from(data, offset)[0] + offset = offset + 2 + types[i].size = typelen + + offset = pad_up_4(offset) + offset += 4 + + structs_len = intstruct.unpack_from(data, offset)[0] + offset += 4 + log.debug("building #%d structures" % structs_len) + pointer_size = self.header.pointer_size + for sdna_index in range(structs_len): + struct_type_index, fields_len = shortstruct2.unpack_from(data, offset) + offset += 4 + + dna_struct = types[struct_type_index] + sdna_index_from_id[dna_struct.dna_type_id] = sdna_index + structs.append(dna_struct) + + dna_offset = 0 + + for field_index in range(fields_len): + field_type_index, field_name_index = shortstruct2.unpack_from(data, offset) + offset += 4 + + dna_type = types[field_type_index] + dna_name = typenames[field_name_index] + + if dna_name.is_pointer or dna_name.is_method_pointer: + dna_size = pointer_size * dna_name.array_size + else: + dna_size = dna_type.size * dna_name.array_size + + field = dna.Field(dna_type, dna_name, dna_size, dna_offset) + dna_struct.append_field(field) + dna_offset += dna_size + + def abspath(self, relpath: bpathlib.BlendPath) -> bpathlib.BlendPath: + """Construct an absolute path from a blendfile-relative path.""" + + if relpath.is_absolute(): + return relpath + + bfile_dir = self.filepath.absolute().parent + root = bpathlib.BlendPath(bfile_dir) + abspath = relpath.absolute(root) + + my_log = self.log.getChild('abspath') + my_log.debug('Resolved %s relative to %s to %s', relpath, self.filepath, abspath) + + return abspath + + def dereference_pointer(self, address: int) -> 'BlendFileBlock': + """Return the pointed-to block, or raise SegmentationFault.""" + + try: + return self.block_from_addr[address] + except KeyError: + raise exceptions.SegmentationFault('address does not exist', address) from None + + def struct(self, name: bytes) -> dna.Struct: + index = self.sdna_index_from_id[name] + return self.structs[index] + + +@functools.total_ordering +class BlendFileBlock: + """ + Instance of a struct. + """ + + # Due to the huge number of BlendFileBlock objects created for packing a + # production-size blend file, using slots here actually makes the + # dependency tracer significantly (p<0.001) faster. In my test case the + # speed improvement was 16% for a 'bam list' command. + __slots__ = ( + 'bfile', 'code', 'size', 'addr_old', 'sdna_index', + 'count', 'file_offset', 'endian', '_id_name', + ) + + log = log.getChild('BlendFileBlock') + old_structure = struct.Struct(b'4sI') + """old blend files ENDB block structure""" + + def __init__(self, bfile: BlendFile) -> None: + self.bfile = bfile + + # Defaults; actual values are set by interpreting the block header. + self.code = b'' + self.size = 0 + self.addr_old = 0 + self.sdna_index = 0 + self.count = 0 + self.file_offset = 0 + """Offset in bytes from start of file to beginning of the data block. + + Points to the data after the block header. + """ + self.endian = bfile.header.endian + self._id_name = ... # type: typing.Union[None, ellipsis, bytes] + + header_struct = bfile.block_header_struct + data = bfile.fileobj.read(header_struct.size) + if len(data) != header_struct.size: + self.log.warning("Blend file %s seems to be truncated, " + "expected %d bytes but could read only %d", + bfile.filepath, header_struct.size, len(data)) + self.code = b'ENDB' + return + + # header size can be 8, 20, or 24 bytes long + # 8: old blend files ENDB block (exception) + # 20: normal headers 32 bit platform + # 24: normal headers 64 bit platform + if len(data) <= 15: + self.log.debug('interpreting block as old-style ENB block') + blockheader = self.old_structure.unpack(data) + self.code = self.endian.read_data0(blockheader[0]) + return + + blockheader = header_struct.unpack(data) + self.code = self.endian.read_data0(blockheader[0]) + if self.code != b'ENDB': + self.size = blockheader[1] + self.addr_old = blockheader[2] + self.sdna_index = blockheader[3] + self.count = blockheader[4] + self.file_offset = bfile.fileobj.tell() + + def __repr__(self) -> str: + return "<%s.%s (%s), size=%d at %s>" % ( + self.__class__.__name__, + self.dna_type_name, + self.code.decode(), + self.size, + hex(self.addr_old), + ) + + def __hash__(self) -> int: + return hash((self.code, self.addr_old, self.bfile.filepath)) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, BlendFileBlock): + return False + return (self.code == other.code and + self.addr_old == other.addr_old and + self.bfile.filepath == other.bfile.filepath) + + def __lt__(self, other: 'BlendFileBlock') -> bool: + """Order blocks by file path and offset within that file.""" + if not isinstance(other, BlendFileBlock): + raise NotImplemented() + my_key = self.bfile.filepath, self.file_offset + other_key = other.bfile.filepath, other.file_offset + return my_key < other_key + + def __bool__(self) -> bool: + """Data blocks are always True.""" + return True + + @property + def dna_type(self) -> dna.Struct: + return self.bfile.structs[self.sdna_index] + + @property + def dna_type_id(self) -> bytes: + return self.dna_type.dna_type_id + + @property + def dna_type_name(self) -> str: + return self.dna_type_id.decode('ascii') + + @property + def id_name(self) -> typing.Optional[bytes]: + """Same as block[b'id', b'name']; None if there is no such field. + + Evaluated only once, so safe to call multiple times without producing + excessive disk I/O. + """ + if self._id_name is ...: + try: + self._id_name = self[b'id', b'name'] + except KeyError: + self._id_name = None + + # TODO(Sybren): figure out how to let mypy know self._id_name cannot + # be ellipsis at this point. + return self._id_name # type: ignore + + def refine_type_from_index(self, sdna_index: int): + """Change the DNA Struct associated with this block. + + Use to make a block type more specific, for example when you have a + modifier but need to access it as SubSurfModifier. + + :param sdna_index: the SDNA index of the DNA type. + """ + assert type(sdna_index) is int + sdna_index_curr = self.sdna_index + self.bfile.ensure_subtype_smaller(sdna_index_curr, sdna_index) + self.sdna_index = sdna_index + + def refine_type(self, dna_type_id: bytes): + """Change the DNA Struct associated with this block. + + Use to make a block type more specific, for example when you have a + modifier but need to access it as SubSurfModifier. + + :param dna_type_id: the name of the DNA type. + """ + assert isinstance(dna_type_id, bytes) + sdna_index = self.bfile.sdna_index_from_id[dna_type_id] + self.refine_type_from_index(sdna_index) + + def abs_offset(self, path: dna.FieldPath) -> typing.Tuple[int, int]: + """Compute the absolute file offset of the field. + + :returns: tuple (offset in bytes, length of array in items) + """ + field, field_offset = self.dna_type.field_from_path(self.bfile.header.pointer_size, path) + return self.file_offset + field_offset, field.name.array_size + + def get(self, + path: dna.FieldPath, + default=..., + null_terminated=True, + as_str=False, + return_field=False + ) -> typing.Any: + """Read a property and return the value. + + :param path: name of the property (like `b'loc'`), tuple of names + to read a sub-property (like `(b'id', b'name')`), or tuple of + name and index to read one item from an array (like + `(b'loc', 2)`) + :param default: The value to return when the field does not exist. + Use Ellipsis (the default value) to raise a KeyError instead. + :param null_terminated: Only used when reading bytes or strings. When + True, stops reading at the first zero byte; be careful with this + when reading binary data. + :param as_str: When True, automatically decode bytes to string + (assumes UTF-8 encoding). + :param return_field: When True, returns tuple (dna.Field, value). + Otherwise just returns the value. + """ + self.bfile.fileobj.seek(self.file_offset, os.SEEK_SET) + + dna_struct = self.bfile.structs[self.sdna_index] + field, value = dna_struct.field_get( + self.bfile.header, self.bfile.fileobj, path, + default=default, + null_terminated=null_terminated, as_str=as_str, + ) + if return_field: + return value, field + return value + + def get_recursive_iter(self, + path: dna.FieldPath, + path_root: dna.FieldPath = b'', + default=..., + null_terminated=True, + as_str=True, + ) -> typing.Iterator[typing.Tuple[dna.FieldPath, typing.Any]]: + """Generator, yields (path, property value) tuples. + + If a property cannot be decoded, a string representing its DNA type + name is used as its value instead, between pointy brackets. + """ + path_full = path # type: dna.FieldPath + if path_root: + if isinstance(path_root, bytes): + path_root = (path_root,) + if isinstance(path, bytes): + path = (path,) + path_full = tuple(path_root) + tuple(path) + + try: + # Try accessing as simple property + yield (path_full, + self.get(path_full, default, null_terminated, as_str)) + except exceptions.NoReaderImplemented as ex: + # This was not a simple property, so recurse into its DNA Struct. + dna_type = ex.dna_type + struct_index = self.bfile.sdna_index_from_id.get(dna_type.dna_type_id) + if struct_index is None: + yield (path_full, "<%s>" % dna_type.dna_type_id.decode('ascii')) + return + + # Recurse through the fields. + for f in dna_type.fields: + yield from self.get_recursive_iter(f.name.name_only, path_full, default=default, + null_terminated=null_terminated, as_str=as_str) + + def hash(self) -> int: + """Generate a pointer-independent hash for the block. + + Generates a 'hash' that can be used instead of addr_old as block id, + which should be 'stable' across .blend file load & save (i.e. it does + not changes due to pointer addresses variations). + """ + # TODO This implementation is most likely far from optimal... and CRC32 + # is not kown as the best hashing algo either. But for now does the job! + import zlib + + dna_type = self.dna_type + pointer_size = self.bfile.header.pointer_size + + hsh = 1 + for path, value in self.items_recursive(): + field, _ = dna_type.field_from_path(pointer_size, path) + if field.name.is_pointer: + continue + hsh = zlib.adler32(str(value).encode(), hsh) + return hsh + + def set(self, path: bytes, value): + dna_struct = self.bfile.structs[self.sdna_index] + self.bfile.mark_modified() + self.bfile.fileobj.seek(self.file_offset, os.SEEK_SET) + return dna_struct.field_set(self.bfile.header, self.bfile.fileobj, path, value) + + def get_pointer( + self, path: dna.FieldPath, + default=..., + ) -> typing.Union[None, 'BlendFileBlock']: + """Same as get() but dereferences a pointer. + + :raises exceptions.SegmentationFault: when there is no datablock with + the pointed-to address. + """ + result = self.get(path, default=default) + + # If it's not an integer, we have no pointer to follow and this may + # actually be a non-pointer property. + if type(result) is not int: + return result + + if result == 0: + return None + + try: + return self.bfile.dereference_pointer(result) + except exceptions.SegmentationFault as ex: + ex.field_path = path + raise + + def iter_array_of_pointers(self, path: dna.FieldPath, array_size: int) \ + -> typing.Iterator['BlendFileBlock']: + """Dereference pointers from an array-of-pointers field. + + Use this function when you have a field like Mesh materials: + `Mat **mat` + + :param path: The array-of-pointers field. + :param array_size: Number of items in the array. If None, the + on-disk size of the DNA field is divided by the pointer size to + obtain the array size. + """ + if array_size == 0: + return + + array = self.get_pointer(path) + assert array is not None + assert array.code == b'DATA', \ + 'Array data block should have code DATA, is %r' % array.code.decode() + file_offset = array.file_offset + + endian = self.bfile.header.endian + ps = self.bfile.header.pointer_size + + for i in range(array_size): + fileobj = self.bfile.fileobj + fileobj.seek(file_offset + ps * i, os.SEEK_SET) + address = endian.read_pointer(fileobj, ps) + if address == 0: + continue + yield self.bfile.dereference_pointer(address) + + def iter_fixed_array_of_pointers(self, path: dna.FieldPath) \ + -> typing.Iterator['BlendFileBlock']: + """Yield blocks from a fixed-size array field. + + Use this function when you have a field like lamp textures: + `MTex *mtex[18]` + + The size of the array is determined automatically by the size in bytes + of the field divided by the pointer size of the blend file. + + :param path: The array field. + :raises KeyError: if the path does not exist. + """ + + dna_struct = self.dna_type + ps = self.bfile.header.pointer_size + endian = self.bfile.header.endian + fileobj = self.bfile.fileobj + + field, offset_in_struct = dna_struct.field_from_path(ps, path) + array_size = field.size // ps + + for i in range(array_size): + fileobj.seek(self.file_offset + offset_in_struct + ps * i, os.SEEK_SET) + address = endian.read_pointer(fileobj, ps) + if not address: + # Fixed-size arrays contain 0-pointers. + continue + yield self.bfile.dereference_pointer(address) + + def __getitem__(self, path: dna.FieldPath): + return self.get(path) + + def __setitem__(self, item: bytes, value) -> None: + self.set(item, value) + + def keys(self) -> typing.Iterator[bytes]: + """Generator, yields all field names of this block.""" + return (f.name.name_only for f in self.dna_type.fields) + + def values(self) -> typing.Iterable[typing.Any]: + for k in self.keys(): + try: + yield self[k] + except exceptions.NoReaderImplemented as ex: + yield '<%s>' % ex.dna_type.dna_type_id.decode('ascii') + + def items(self) -> typing.Iterable[typing.Tuple[bytes, typing.Any]]: + for k in self.keys(): + try: + yield (k, self[k]) + except exceptions.NoReaderImplemented as ex: + yield (k, '<%s>' % ex.dna_type.dna_type_id.decode('ascii')) + + def items_recursive(self) -> typing.Iterator[typing.Tuple[dna.FieldPath, typing.Any]]: + """Generator, yields (property path, property value) recursively for all properties.""" + for k in self.keys(): + yield from self.get_recursive_iter(k, as_str=False) diff --git a/blendfile/dna.py b/blendfile/dna.py new file mode 100644 index 0000000..21f60e4 --- /dev/null +++ b/blendfile/dna.py @@ -0,0 +1,334 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2009, At Mind B.V. - Jeroen Bakker +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import logging +import os +import typing + +from . import header, exceptions + +# Either a simple path b'propname', or a tuple (b'parentprop', b'actualprop', arrayindex) +FieldPath = typing.Union[bytes, typing.Iterable[typing.Union[bytes, int]]] + +log = logging.getLogger(__name__) + + +class Name: + """dna.Name is a C-type name stored in the DNA as bytes.""" + + def __init__(self, name_full: bytes) -> None: + self.name_full = name_full + self.name_only = self.calc_name_only() + self.is_pointer = self.calc_is_pointer() + self.is_method_pointer = self.calc_is_method_pointer() + self.array_size = self.calc_array_size() + + def __repr__(self): + return '%s(%r)' % (type(self).__qualname__, self.name_full) + + def as_reference(self, parent) -> bytes: + if not parent: + return self.name_only + return parent + b'.' + self.name_only + + def calc_name_only(self) -> bytes: + result = self.name_full.strip(b'*()') + index = result.find(b'[') + if index == -1: + return result + return result[:index] + + def calc_is_pointer(self) -> bool: + return b'*' in self.name_full + + def calc_is_method_pointer(self): + return b'(*' in self.name_full + + def calc_array_size(self): + result = 1 + partial_name = self.name_full + + while True: + idx_start = partial_name.find(b'[') + if idx_start < 0: + break + + idx_stop = partial_name.find(b']') + result *= int(partial_name[idx_start + 1:idx_stop]) + partial_name = partial_name[idx_stop + 1:] + + return result + + +class Field: + """dna.Field is a coupled dna.Struct and dna.Name. + + It also contains the file offset in bytes. + + :ivar name: the name of the field. + :ivar dna_type: the type of the field. + :ivar size: size of the field on disk, in bytes. + :ivar offset: cached offset of the field, in bytes. + """ + + def __init__(self, + dna_type: 'Struct', + name: Name, + size: int, + offset: int) -> None: + self.dna_type = dna_type + self.name = name + self.size = size + self.offset = offset + + def __repr__(self): + return '<%r %r (%s)>' % (type(self).__qualname__, self.name, self.dna_type) + + +class Struct: + """dna.Struct is a C-type structure stored in the DNA.""" + + log = log.getChild('Struct') + + def __init__(self, dna_type_id: bytes, size: int = None) -> None: + """ + :param dna_type_id: name of the struct in C, like b'AlembicObjectPath'. + :param size: only for unit tests; typically set after construction by + BlendFile.decode_structs(). If not set, it is calculated on the fly + when struct.size is evaluated, based on the available fields. + """ + self.dna_type_id = dna_type_id + self._size = size + self._fields = [] # type: typing.List[Field] + self._fields_by_name = {} # type: typing.Dict[bytes, Field] + + def __repr__(self): + return '%s(%r)' % (type(self).__qualname__, self.dna_type_id) + + @property + def size(self) -> int: + if self._size is None: + if not self._fields: + raise ValueError('Unable to determine size of fieldless %r' % self) + last_field = max(self._fields, key=lambda f: f.offset) + self._size = last_field.offset + last_field.size + return self._size + + @size.setter + def size(self, new_size: int): + self._size = new_size + + def append_field(self, field: Field): + self._fields.append(field) + self._fields_by_name[field.name.name_only] = field + + @property + def fields(self) -> typing.List[Field]: + """Return the fields of this Struct. + + Do not modify the returned list; use append_field() instead. + """ + return self._fields + + def has_field(self, field_name: bytes) -> bool: + return field_name in self._fields_by_name + + def field_from_path(self, + pointer_size: int, + path: FieldPath) \ + -> typing.Tuple[Field, int]: + """ + Support lookups as bytes or a tuple of bytes and optional index. + + C style 'id.name' --> (b'id', b'name') + C style 'array[4]' --> (b'array', 4) + + :returns: the field itself, and its offset taking into account the + optional index. The offset is relative to the start of the struct, + i.e. relative to the BlendFileBlock containing the data. + :raises KeyError: if the field does not exist. + """ + if isinstance(path, tuple): + name = path[0] + if len(path) >= 2 and not isinstance(path[1], bytes): + name_tail = path[2:] + index = path[1] + assert isinstance(index, int) + else: + name_tail = path[1:] + index = 0 + else: + name = path + name_tail = () + index = 0 + + if not isinstance(name, bytes): + raise TypeError('name should be bytes, but is %r' % type(name)) + + field = self._fields_by_name.get(name) + if not field: + raise KeyError('%r has no field %r, only %r' % + (self, name, sorted(self._fields_by_name.keys()))) + + offset = field.offset + if index: + if field.name.is_pointer: + index_offset = pointer_size * index + else: + index_offset = field.dna_type.size * index + if index_offset >= field.size: + raise OverflowError('path %r is out of bounds of its DNA type %s' % + (path, field.dna_type)) + offset += index_offset + + if name_tail: + subval, suboff = field.dna_type.field_from_path(pointer_size, name_tail) + return subval, suboff + offset + + return field, offset + + def field_get(self, + file_header: header.BlendFileHeader, + fileobj: typing.IO[bytes], + path: FieldPath, + default=..., + null_terminated=True, + as_str=True, + ) -> typing.Tuple[typing.Optional[Field], typing.Any]: + """Read the value of the field from the blend file. + + Assumes the file pointer of `fileobj` is seek()ed to the start of the + struct on disk (e.g. the start of the BlendFileBlock containing the + data). + + :param file_header: + :param fileobj: + :param path: + :param default: The value to return when the field does not exist. + Use Ellipsis (the default value) to raise a KeyError instead. + :param null_terminated: Only used when reading bytes or strings. When + True, stops reading at the first zero byte. Be careful with this + default when reading binary data. + :param as_str: When True, automatically decode bytes to string + (assumes UTF-8 encoding). + :returns: The field instance and the value. If a default value was passed + and the field was not found, (None, default) is returned. + """ + try: + field, offset = self.field_from_path(file_header.pointer_size, path) + except KeyError: + if default is ...: + raise + return None, default + + fileobj.seek(offset, os.SEEK_CUR) + + dna_type = field.dna_type + dna_name = field.name + endian = file_header.endian + + # Some special cases (pointers, strings/bytes) + if dna_name.is_pointer: + return field, endian.read_pointer(fileobj, file_header.pointer_size) + if dna_type.dna_type_id == b'char': + return field, self._field_get_char(file_header, fileobj, field, null_terminated, as_str) + + simple_readers = { + b'int': endian.read_int, + b'short': endian.read_short, + b'uint64_t': endian.read_ulong, + b'float': endian.read_float, + } + try: + simple_reader = simple_readers[dna_type.dna_type_id] + except KeyError: + raise exceptions.NoReaderImplemented( + "%r exists but not simple type (%r), can't resolve field %r" % + (path, dna_type.dna_type_id.decode(), dna_name.name_only), + dna_name, dna_type) from None + + if isinstance(path, tuple) and len(path) > 1 and isinstance(path[-1], int): + # The caller wants to get a single item from an array. The offset we seeked to already + # points to this item. In this case we do not want to look at dna_name.array_size, + # because we want a single item from that array. + return field, simple_reader(fileobj) + + if dna_name.array_size > 1: + return field, [simple_reader(fileobj) for _ in range(dna_name.array_size)] + return field, simple_reader(fileobj) + + def _field_get_char(self, + file_header: header.BlendFileHeader, + fileobj: typing.IO[bytes], + field: 'Field', + null_terminated: typing.Optional[bool], + as_str: bool) -> typing.Any: + dna_name = field.name + endian = file_header.endian + + if field.size == 1: + # Single char, assume it's bitflag or int value, and not a string/bytes data... + return endian.read_char(fileobj) + + if null_terminated or (null_terminated is None and as_str): + data = endian.read_bytes0(fileobj, dna_name.array_size) + else: + data = fileobj.read(dna_name.array_size) + + if as_str: + return data.decode('utf8') + return data + + def field_set(self, + file_header: header.BlendFileHeader, + fileobj: typing.IO[bytes], + path: bytes, + value: typing.Any): + """Write a value to the blend file. + + Assumes the file pointer of `fileobj` is seek()ed to the start of the + struct on disk (e.g. the start of the BlendFileBlock containing the + data). + """ + assert isinstance(path, bytes), 'path should be bytes, but is %s' % type(path) + + field, offset = self.field_from_path(file_header.pointer_size, path) + + dna_type = field.dna_type + dna_name = field.name + endian = file_header.endian + + if dna_type.dna_type_id != b'char': + msg = "Setting type %r is not supported for %s.%s" % ( + dna_type, self.dna_type_id.decode(), dna_name.name_full.decode()) + raise exceptions.NoWriterImplemented(msg, dna_name, dna_type) + + fileobj.seek(offset, os.SEEK_CUR) + + if self.log.isEnabledFor(logging.DEBUG): + filepos = fileobj.tell() + thing = 'string' if isinstance(value, str) else 'bytes' + self.log.debug('writing %s %r at file offset %d / %x', thing, value, filepos, filepos) + + if isinstance(value, str): + return endian.write_string(fileobj, value, dna_name.array_size) + else: + return endian.write_bytes(fileobj, value, dna_name.array_size) diff --git a/blendfile/dna_io.py b/blendfile/dna_io.py new file mode 100644 index 0000000..ddfce71 --- /dev/null +++ b/blendfile/dna_io.py @@ -0,0 +1,163 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2009, At Mind B.V. - Jeroen Bakker +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Read-write utility functions.""" + +import struct +import typing + + +class EndianIO: + # TODO(Sybren): note as UCHAR: struct.Struct = None and move actual structs to LittleEndianTypes + UCHAR = struct.Struct(b' int: + """Write a (truncated) string as UTF-8. + + The string will always be written 0-terminated. + + :param fileobj: the file to write to. + :param astring: the string to write. + :param fieldlen: the field length in bytes. + :returns: the number of bytes written. + """ + assert isinstance(astring, str) + encoded = astring.encode('utf-8') + + # Take into account we also need space for a trailing 0-byte. + maxlen = fieldlen - 1 + + if len(encoded) >= maxlen: + encoded = encoded[:maxlen] + + # Keep stripping off the last byte until the string + # is valid UTF-8 again. + while True: + try: + encoded.decode('utf8') + except UnicodeDecodeError: + encoded = encoded[:-1] + else: + break + + return fileobj.write(encoded + b'\0') + + @classmethod + def write_bytes(cls, fileobj: typing.IO[bytes], data: bytes, fieldlen: int) -> int: + """Write (truncated) bytes. + + When len(data) < fieldlen, a terminating b'\0' will be appended. + + :returns: the number of bytes written. + """ + assert isinstance(data, (bytes, bytearray)) + if len(data) >= fieldlen: + to_write = data[0:fieldlen] + else: + to_write = data + b'\0' + + return fileobj.write(to_write) + + @classmethod + def read_bytes0(cls, fileobj, length): + data = fileobj.read(length) + return cls.read_data0(data) + + @classmethod + def read_data0_offset(cls, data, offset): + add = data.find(b'\0', offset) - offset + return data[offset:offset + add] + + @classmethod + def read_data0(cls, data): + add = data.find(b'\0') + if add < 0: + return data + return data[:add] + + +class LittleEndianTypes(EndianIO): + pass + + +class BigEndianTypes(LittleEndianTypes): + UCHAR = struct.Struct(b'>B') + USHORT = struct.Struct(b'>H') + USHORT2 = struct.Struct(b'>HH') # two shorts in a row + SSHORT = struct.Struct(b'>h') + UINT = struct.Struct(b'>I') + SINT = struct.Struct(b'>i') + FLOAT = struct.Struct(b'>f') + ULONG = struct.Struct(b'>Q') diff --git a/blendfile/exceptions.py b/blendfile/exceptions.py new file mode 100644 index 0000000..c2350c1 --- /dev/null +++ b/blendfile/exceptions.py @@ -0,0 +1,76 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2009, At Mind B.V. - Jeroen Bakker +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel + + +import pathlib + + +class BlendFileError(Exception): + """Raised when there was an error reading/parsing a blend file.""" + + def __init__(self, message: str, filepath: pathlib.Path) -> None: + super().__init__(message) + self.filepath = filepath + + def __str__(self): + return '%s: %s' % (super().__str__(), self.filepath) + + +class NoDNA1Block(BlendFileError): + """Raised when the blend file contains no DNA1 block.""" + + +class NoReaderImplemented(NotImplementedError): + """Raised when reading a property of a non-implemented type. + + This indicates that the property should be read using some dna.Struct. + + :type dna_name: blender_asset_tracer.blendfile.dna.Name + :type dna_type: blender_asset_tracer.blendfile.dna.Struct + """ + + def __init__(self, message: str, dna_name, dna_type) -> None: + super().__init__(message) + self.dna_name = dna_name + self.dna_type = dna_type + + +class NoWriterImplemented(NotImplementedError): + """Raised when writing a property of a non-implemented type. + + :type dna_name: blender_asset_tracer.blendfile.dna.Name + :type dna_type: blender_asset_tracer.blendfile.dna.Struct + """ + + def __init__(self, message: str, dna_name, dna_type) -> None: + super().__init__(message) + self.dna_name = dna_name + self.dna_type = dna_type + + +class SegmentationFault(Exception): + """Raised when a pointer to a non-existant datablock was dereferenced.""" + + def __init__(self, message: str, address: int, field_path=None) -> None: + super().__init__(message) + self.address = address + self.field_path = field_path diff --git a/blendfile/header.py b/blendfile/header.py new file mode 100644 index 0000000..b059313 --- /dev/null +++ b/blendfile/header.py @@ -0,0 +1,78 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2009, At Mind B.V. - Jeroen Bakker +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import logging +import os +import pathlib +import struct +import typing + +from . import dna_io, exceptions + +log = logging.getLogger(__name__) + + +class BlendFileHeader: + """ + BlendFileHeader represents the first 12 bytes of a blend file. + + It contains information about the hardware architecture, which is relevant + to the structure of the rest of the file. + """ + structure = struct.Struct(b'7s1s1s3s') + + def __init__(self, fileobj: typing.IO[bytes], path: pathlib.Path) -> None: + log.debug("reading blend-file-header %s", path) + fileobj.seek(0, os.SEEK_SET) + header = fileobj.read(self.structure.size) + values = self.structure.unpack(header) + + self.magic = values[0] + + pointer_size_id = values[1] + if pointer_size_id == b'-': + self.pointer_size = 8 + elif pointer_size_id == b'_': + self.pointer_size = 4 + else: + raise exceptions.BlendFileError('invalid pointer size %r' % pointer_size_id, path) + + endian_id = values[2] + if endian_id == b'v': + self.endian = dna_io.LittleEndianTypes + self.endian_str = b'<' # indication for struct.Struct() + elif endian_id == b'V': + self.endian = dna_io.BigEndianTypes + self.endian_str = b'>' # indication for struct.Struct() + else: + raise exceptions.BlendFileError('invalid endian indicator %r' % endian_id, path) + + version_id = values[3] + self.version = int(version_id) + + def create_block_header_struct(self) -> struct.Struct: + """Create a Struct instance for parsing data block headers.""" + return struct.Struct(b''.join(( + self.endian_str, + b'4sI', + b'I' if self.pointer_size == 4 else b'Q', + b'II', + ))) diff --git a/blendfile/iterators.py b/blendfile/iterators.py new file mode 100644 index 0000000..1a76e4c --- /dev/null +++ b/blendfile/iterators.py @@ -0,0 +1,70 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2009, At Mind B.V. - Jeroen Bakker +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import typing + +from blender_asset_tracer import cdefs +from . import BlendFileBlock +from .dna import FieldPath + + +def listbase(block: typing.Optional[BlendFileBlock], next_path: FieldPath = b'next') \ + -> typing.Iterator[BlendFileBlock]: + """Generator, yields all blocks in the ListBase linked list.""" + while block: + yield block + next_ptr = block[next_path] + if next_ptr == 0: + break + block = block.bfile.dereference_pointer(next_ptr) + + +def sequencer_strips(sequence_editor: BlendFileBlock) \ + -> typing.Iterator[typing.Tuple[BlendFileBlock, int]]: + """Generator, yield all sequencer strip blocks with their type number. + + Recurses into meta strips, yielding both the meta strip itself and the + strips contained within it. + + See blender_asset_tracer.cdefs.SEQ_TYPE_xxx for the type numbers. + """ + + def iter_seqbase(seqbase) -> typing.Iterator[typing.Tuple[BlendFileBlock, int]]: + for seq in listbase(seqbase): + seq.refine_type(b'Sequence') + seq_type = seq[b'type'] + yield seq, seq_type + + if seq_type == cdefs.SEQ_TYPE_META: + # Recurse into this meta-sequence. + subseq = seq.get_pointer((b'seqbase', b'first')) + yield from iter_seqbase(subseq) + + sbase = sequence_editor.get_pointer((b'seqbase', b'first')) + yield from iter_seqbase(sbase) + + +def modifiers(object_block: BlendFileBlock) -> typing.Iterator[BlendFileBlock]: + """Generator, yield the object's modifiers.""" + + # 'ob->modifiers[...]' + mods = object_block.get_pointer((b'modifiers', b'first')) + yield from listbase(mods, next_path=(b'modifier', b'next')) diff --git a/bpathlib.py b/bpathlib.py new file mode 100644 index 0000000..5e878af --- /dev/null +++ b/bpathlib.py @@ -0,0 +1,212 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Blender path support. + +Does not use pathlib, because we may have to handle POSIX paths on Windows +or vice versa. +""" + +import os.path +import pathlib +import platform +import string +import sys + + +class BlendPath(bytes): + """A path within Blender is always stored as bytes.""" + + def __new__(cls, path): + if isinstance(path, pathlib.PurePath): + path = str(path).encode('utf-8') + if not isinstance(path, bytes): + raise TypeError('path must be bytes or pathlib.Path, but is %r' % path) + + return super().__new__(cls, path.replace(b'\\', b'/')) + + @classmethod + def mkrelative(cls, asset_path: pathlib.PurePath, bfile_path: pathlib.PurePath) -> 'BlendPath': + """Construct a BlendPath to the asset relative to the blend file. + + Assumes that bfile_path is absolute. + + Note that this can return an absolute path on Windows when 'asset_path' + and 'bfile_path' are on different drives. + """ + from collections import deque + + # Only compare absolute paths. + assert bfile_path.is_absolute(), \ + 'BlendPath().mkrelative(bfile_path=%r) should get absolute bfile_path' % bfile_path + assert asset_path.is_absolute(), \ + 'BlendPath().mkrelative(asset_path=%r) should get absolute asset_path' % asset_path + + # There is no way to construct a relative path between drives. + if bfile_path.drive != asset_path.drive: + return cls(asset_path) + + bdir_parts = deque(bfile_path.parent.parts) + asset_path = make_absolute(asset_path) + asset_parts = deque(asset_path.parts) + + # Remove matching initial parts. What is left in bdir_parts represents + # the number of '..' we need. What is left in asset_parts represents + # what we need after the '../../../'. + while bdir_parts: + if bdir_parts[0] != asset_parts[0]: + break + bdir_parts.popleft() + asset_parts.popleft() + + rel_asset = pathlib.PurePath(*asset_parts) + # TODO(Sybren): should we use sys.getfilesystemencoding() instead? + rel_bytes = str(rel_asset).encode('utf-8') + as_bytes = b'//' + len(bdir_parts) * b'../' + rel_bytes + return cls(as_bytes) + + def __str__(self) -> str: + """Decodes the path as UTF-8, replacing undecodable bytes. + + Undecodable bytes are ignored so this function can be safely used + for reporting. + """ + return self.decode('utf8', errors='replace') + + def __repr__(self) -> str: + return 'BlendPath(%s)' % super().__repr__() + + def __truediv__(self, subpath: bytes): + """Slash notation like pathlib.Path.""" + sub = BlendPath(subpath) + if sub.is_absolute(): + raise ValueError("'a / b' only works when 'b' is a relative path") + return BlendPath(self.rstrip(b'/') + b'/' + sub) + + def __rtruediv__(self, parentpath: bytes): + """Slash notation like pathlib.Path.""" + if self.is_absolute(): + raise ValueError("'a / b' only works when 'b' is a relative path") + return BlendPath(parentpath.rstrip(b'/') + b'/' + self) + + def to_path(self) -> pathlib.PurePath: + """Convert this path to a pathlib.PurePath. + + This path MUST NOT be a blendfile-relative path (e.g. it may not start + with `//`). For such paths, first use `.absolute()` to resolve the path. + + Interprets the path as UTF-8, and if that fails falls back to the local + filesystem encoding. + + The exact type returned is determined by the current platform. + """ + # TODO(Sybren): once we target Python 3.6, implement __fspath__(). + try: + decoded = self.decode('utf8') + except UnicodeDecodeError: + decoded = self.decode(sys.getfilesystemencoding()) + if self.is_blendfile_relative(): + raise ValueError('to_path() cannot be used on blendfile-relative paths') + return pathlib.PurePath(decoded) + + def is_blendfile_relative(self) -> bool: + return self[:2] == b'//' + + def is_absolute(self) -> bool: + if self.is_blendfile_relative(): + return False + if self[0:1] == b'/': + return True + + # Windows style path starting with drive letter. + if (len(self) >= 3 and + (self.decode('utf8'))[0] in string.ascii_letters and + self[1:2] == b':' and + self[2:3] in {b'\\', b'/'}): + return True + + return False + + def absolute(self, root: bytes = b'') -> 'BlendPath': + """Determine absolute path. + + :param root: root directory to compute paths relative to. + For blendfile-relative paths, root should be the directory + containing the blendfile. If not given, blendfile-relative + paths cause a ValueError but filesystem-relative paths are + resolved based on the current working directory. + """ + if self.is_absolute(): + return self + + if self.is_blendfile_relative(): + my_relpath = self[2:] # strip off leading // + else: + my_relpath = self + return BlendPath(os.path.join(root, my_relpath)) + + +def make_absolute(path: pathlib.PurePath) -> pathlib.Path: + """Make the path absolute without resolving symlinks or drive letters. + + This function is an alternative to `Path.resolve()`. It make the path absolute, + and resolves `../../`, but contrary to `Path.resolve()` does NOT perform these + changes: + - Symlinks are NOT followed. + - Windows Network shares that are mapped to a drive letter are NOT resolved + to their UNC notation. + + The type of the returned path is determined by the current platform. + """ + str_path = path.as_posix() + if len(str_path) >= 2 and str_path[0].isalpha() and str_path[1] == ':': + # This is an absolute Windows path. It must be handled with care on non-Windows platforms. + if platform.system() != 'Windows': + # Normalize the POSIX-like part of the path, but leave out the drive letter. + non_drive_path = str_path[2:] + normalized = os.path.normpath(non_drive_path) + # Stick the drive letter back on the normalized path. + return pathlib.Path(str_path[:2] + normalized) + + return pathlib.Path(os.path.abspath(str_path)) + + +def strip_root(path: pathlib.PurePath) -> pathlib.PurePosixPath: + """Turn the path into a relative path by stripping the root. + + This also turns any drive letter into a normal path component. + + This changes "C:/Program Files/Blender" to "C/Program Files/Blender", + and "/absolute/path.txt" to "absolute/path.txt", making it possible to + treat it as a relative path. + """ + + if path.drive: + return pathlib.PurePosixPath(path.drive[0], *path.parts[1:]) + if isinstance(path, pathlib.PurePosixPath): + # This happens when running on POSIX but still handling paths + # originating from a Windows machine. + parts = path.parts + if parts and len(parts[0]) == 2 and parts[0][0].isalpha() and parts[0][1] == ':': + # The first part is a drive letter. + return pathlib.PurePosixPath(parts[0][0], *path.parts[1:]) + + if path.is_absolute(): + return pathlib.PurePosixPath(*path.parts[1:]) + return pathlib.PurePosixPath(path) diff --git a/cdefs.py b/cdefs.py new file mode 100644 index 0000000..8a22f84 --- /dev/null +++ b/cdefs.py @@ -0,0 +1,74 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** + +"""Constants defined in C.""" + +# DNA_sequence_types.h (Sequence.type) +SEQ_TYPE_IMAGE = 0 +SEQ_TYPE_META = 1 +SEQ_TYPE_SCENE = 2 +SEQ_TYPE_MOVIE = 3 +SEQ_TYPE_SOUND_RAM = 4 +SEQ_TYPE_SOUND_HD = 5 +SEQ_TYPE_MOVIECLIP = 6 +SEQ_TYPE_MASK = 7 +SEQ_TYPE_EFFECT = 8 + +IMA_SRC_FILE = 1 +IMA_SRC_SEQUENCE = 2 +IMA_SRC_MOVIE = 3 +IMA_SRC_TILED = 6 + +# DNA_modifier_types.h +eModifierType_Wave = 7 +eModifierType_Displace = 14 +eModifierType_UVProject = 15 +eModifierType_ParticleSystem = 19 +eModifierType_Cloth = 22 +eModifierType_Fluidsim = 26 +eModifierType_Smokesim = 31 +eModifierType_WeightVGEdit = 36 +eModifierType_WeightVGMix = 37 +eModifierType_WeightVGProximity = 38 +eModifierType_Ocean = 39 +eModifierType_MeshCache = 46 +eModifierType_MeshSequenceCache = 52 +eModifierType_Nodes = 57 + +# DNA_particle_types.h +PART_DRAW_OB = 7 +PART_DRAW_GR = 8 + +# DNA_object_types.h +# Object.transflag +OB_DUPLIGROUP = 1 << 8 + +# DNA_object_force_types.h +PTCACHE_DISK_CACHE = 64 +PTCACHE_EXTERNAL = 512 + +# BKE_pointcache.h +PTCACHE_FILE_PTCACHE = 0 +PTCACHE_FILE_OPENVDB = 1 +PTCACHE_EXT = b'.bphys' +PTCACHE_EXT_VDB = b'.vdb' +PTCACHE_PATH = b'blendcache_' + +# BKE_node.h +SH_NODE_TEX_IMAGE = 143 +CMP_NODE_R_LAYERS = 221 diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 0000000..3270fae --- /dev/null +++ b/cli/__init__.py @@ -0,0 +1,99 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Commandline entry points.""" + +import argparse +import datetime +import logging +import time + +from . import blocks, common, pack, list_deps + + +def cli_main(): + from blender_asset_tracer import __version__ + parser = argparse.ArgumentParser(description='BAT: Blender Asset Tracer v%s' % __version__) + common.add_flag(parser, 'profile', help='Run the profiler, write to bam.prof') + + # func is set by subparsers to indicate which function to run. + parser.set_defaults(func=None, + loglevel=logging.WARNING) + loggroup = parser.add_mutually_exclusive_group() + loggroup.add_argument('-v', '--verbose', dest='loglevel', + action='store_const', const=logging.INFO, + help='Log INFO level and higher') + loggroup.add_argument('-d', '--debug', dest='loglevel', + action='store_const', const=logging.DEBUG, + help='Log everything') + loggroup.add_argument('-q', '--quiet', dest='loglevel', + action='store_const', const=logging.ERROR, + help='Log at ERROR level and higher') + subparsers = parser.add_subparsers( + help='Choose a subcommand to actually make BAT do something. ' + 'Global options go before the subcommand, ' + 'whereas subcommand-specific options go after it. ' + 'Use --help after the subcommand to get more info.') + + blocks.add_parser(subparsers) + pack.add_parser(subparsers) + list_deps.add_parser(subparsers) + + args = parser.parse_args() + config_logging(args) + + from blender_asset_tracer import __version__ + log = logging.getLogger(__name__) + + # Make sure the things we log in our local logger are visible + if args.profile and args.loglevel > logging.INFO: + log.setLevel(logging.INFO) + log.debug('Running BAT version %s', __version__) + + if not args.func: + parser.error('No subcommand was given') + + start_time = time.time() + if args.profile: + import cProfile + + prof_fname = 'bam.prof' + log.info('Running profiler') + cProfile.runctx('args.func(args)', + globals=globals(), + locals=locals(), + filename=prof_fname) + log.info('Profiler exported data to %s', prof_fname) + log.info('Run "pyprof2calltree -i %r -k" to convert and open in KCacheGrind', prof_fname) + else: + retval = args.func(args) + duration = datetime.timedelta(seconds=time.time() - start_time) + log.info('Command took %s to complete', duration) + + +def config_logging(args): + """Configures the logging system based on CLI arguments.""" + + logging.basicConfig( + level=logging.WARNING, + format='%(asctime)-15s %(levelname)8s %(name)-40s %(message)s', + ) + # Only set the log level on our own logger. Otherwise + # debug logging will be completely swamped. + logging.getLogger('blender_asset_tracer').setLevel(args.loglevel) diff --git a/cli/blocks.py b/cli/blocks.py new file mode 100644 index 0000000..ea450a6 --- /dev/null +++ b/cli/blocks.py @@ -0,0 +1,129 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""List count and total size of datablocks in a blend file.""" +import collections +import logging +import pathlib + +from blender_asset_tracer import blendfile +from . import common + +log = logging.getLogger(__name__) + + +class BlockTypeInfo: + def __init__(self): + self.total_bytes = 0 + self.num_blocks = 0 + self.sizes = [] + self.blocks = [] + self.name = 'unset' + + +def add_parser(subparsers): + """Add argparser for this subcommand.""" + + parser = subparsers.add_parser('blocks', help=__doc__) + parser.set_defaults(func=cli_blocks) + parser.add_argument('blendfile', type=pathlib.Path) + parser.add_argument('-d', '--dump', default=False, action='store_true', + help='Hex-dump the biggest block') + parser.add_argument('-l', '--limit', default=10, type=int, + help='Limit the number of DNA types shown, default is 10') + + +def by_total_bytes(info: BlockTypeInfo) -> int: + return info.total_bytes + + +def block_key(block: blendfile.BlendFileBlock) -> str: + return '%s-%s' % (block.dna_type_name, block.code.decode()) + + +def cli_blocks(args): + bpath = args.blendfile + if not bpath.exists(): + log.fatal('File %s does not exist', args.blendfile) + return 3 + + per_blocktype = collections.defaultdict(BlockTypeInfo) + + print('Opening %s' % bpath) + bfile = blendfile.BlendFile(bpath) + + print('Inspecting %s' % bpath) + for block in bfile.blocks: + if block.code == b'DNA1': + continue + index_as = block_key(block) + + info = per_blocktype[index_as] + info.name = index_as + info.total_bytes += block.size + info.num_blocks += 1 + info.sizes.append(block.size) + info.blocks.append(block) + + fmt = '%-35s %10s %10s %10s %10s' + print(fmt % ('Block type', 'Total Size', 'Num blocks', 'Avg Size', 'Median')) + print(fmt % (35 * '-', 10 * '-', 10 * '-', 10 * '-', 10 * '-')) + infos = sorted(per_blocktype.values(), key=by_total_bytes, reverse=True) + for info in infos[:args.limit]: + median_size = sorted(info.sizes)[len(info.sizes) // 2] + print(fmt % (info.name, + common.humanize_bytes(info.total_bytes), + info.num_blocks, + common.humanize_bytes(info.total_bytes // info.num_blocks), + common.humanize_bytes(median_size) + )) + + print(70 * '-') + # From the blocks of the most space-using category, the biggest block. + biggest_block = sorted(infos[0].blocks, + key=lambda blck: blck.size, + reverse=True)[0] + print('Biggest %s block is %s at address %s' % ( + block_key(biggest_block), + common.humanize_bytes(biggest_block.size), + biggest_block.addr_old, + )) + + print('Finding what points there') + addr_to_find = biggest_block.addr_old + found_pointer = False + for block in bfile.blocks: + for prop_path, prop_value in block.items_recursive(): + if not isinstance(prop_value, int) or prop_value != addr_to_find: + continue + print(' ', block, prop_path) + found_pointer = True + + if not found_pointer: + print('Nothing points there') + + if args.dump: + print('Hexdump:') + bfile.fileobj.seek(biggest_block.file_offset) + data = bfile.fileobj.read(biggest_block.size) + line_len_bytes = 32 + import codecs + for offset in range(0, len(data), line_len_bytes): + line = codecs.encode(data[offset:offset + line_len_bytes], 'hex').decode() + print('%6d -' % offset, ' '.join(line[i:i + 2] for i in range(0, len(line), 2))) diff --git a/cli/common.py b/cli/common.py new file mode 100644 index 0000000..938fd0b --- /dev/null +++ b/cli/common.py @@ -0,0 +1,99 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Common functionality for CLI parsers.""" +import typing + +import pathlib + + +def add_flag(argparser, flag_name: str, **kwargs): + """Add a CLI argument for the flag. + + The flag defaults to False, and when present on the CLI stores True. + """ + + argparser.add_argument('-%s' % flag_name[0], + '--%s' % flag_name, + default=False, + action='store_true', + **kwargs) + + +def shorten(cwd: pathlib.Path, somepath: pathlib.Path) -> pathlib.Path: + """Return 'somepath' relative to CWD if possible.""" + try: + return somepath.relative_to(cwd) + except ValueError: + return somepath + + +def humanize_bytes(size_in_bytes: int, precision: typing.Optional[int]=None): + """Return a humanized string representation of a number of bytes. + + Source: http://code.activestate.com/recipes/577081-humanized-representation-of-a-number-of-bytes + + :param size_in_bytes: The size to humanize + :param precision: How many digits are shown after the comma. When None, + it defaults to 1 unless the entire number of bytes is shown, then + it will be 0. + + >>> humanize_bytes(1) + '1 B' + >>> humanize_bytes(1024) + '1.0 kB' + >>> humanize_bytes(1024*123, 0) + '123 kB' + >>> humanize_bytes(1024*123) + '123.0 kB' + >>> humanize_bytes(1024*12342) + '12.1 MB' + >>> humanize_bytes(1024*12342,2) + '12.05 MB' + >>> humanize_bytes(1024*1234,2) + '1.21 MB' + >>> humanize_bytes(1024*1234*1111,2) + '1.31 GB' + >>> humanize_bytes(1024*1234*1111,1) + '1.3 GB' + """ + + if precision is None: + precision = size_in_bytes >= 1024 + + abbrevs = ( + (1 << 50, 'PB'), + (1 << 40, 'TB'), + (1 << 30, 'GB'), + (1 << 20, 'MB'), + (1 << 10, 'kB'), + (1, 'B') + ) + for factor, suffix in abbrevs: + if size_in_bytes >= factor: + break + else: + factor = 1 + suffix = 'B' + return '%.*f %s' % (precision, size_in_bytes / factor, suffix) + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/cli/list_deps.py b/cli/list_deps.py new file mode 100644 index 0000000..42c93ba --- /dev/null +++ b/cli/list_deps.py @@ -0,0 +1,152 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""List dependencies of a blend file.""" +import functools +import hashlib +import json +import logging +import pathlib +import sys +import time +import typing + +from blender_asset_tracer import trace, bpathlib +from . import common + +log = logging.getLogger(__name__) + + +def add_parser(subparsers): + """Add argparser for this subcommand.""" + + parser = subparsers.add_parser('list', help=__doc__) + parser.set_defaults(func=cli_list) + parser.add_argument('blendfile', type=pathlib.Path) + common.add_flag(parser, 'json', help='Output as JSON instead of human-readable text') + common.add_flag(parser, 'sha256', + help='Include SHA256sums in the output. Note that those may differ from the ' + 'SHA256sums in a BAT-pack when paths are rewritten.') + common.add_flag(parser, 'timing', help='Include timing information in the output') + + +def cli_list(args): + bpath = args.blendfile + if not bpath.exists(): + log.fatal('File %s does not exist', args.blendfile) + return 3 + + if args.json: + if args.sha256: + log.fatal('--sha256 can currently not be used in combination with --json') + if args.timing: + log.fatal('--timing can currently not be used in combination with --json') + report_json(bpath) + else: + report_text(bpath, include_sha256=args.sha256, show_timing=args.timing) + + +def calc_sha_sum(filepath: pathlib.Path) -> typing.Tuple[str, float]: + start = time.time() + + if filepath.is_dir(): + for subfile in filepath.rglob('*'): + calc_sha_sum(subfile) + duration = time.time() - start + return '-multiple-', duration + + summer = hashlib.sha256() + with filepath.open('rb') as infile: + while True: + block = infile.read(32 * 1024) + if not block: + break + summer.update(block) + + digest = summer.hexdigest() + duration = time.time() - start + + return digest, duration + + +def report_text(bpath, *, include_sha256: bool, show_timing: bool): + reported_assets = set() # type: typing.Set[pathlib.Path] + last_reported_bfile = None + shorten = functools.partial(common.shorten, pathlib.Path.cwd()) + + time_spent_on_shasums = 0.0 + start_time = time.time() + + for usage in trace.deps(bpath): + filepath = usage.block.bfile.filepath.absolute() + if filepath != last_reported_bfile: + if include_sha256: + shasum, time_spent = calc_sha_sum(filepath) + time_spent_on_shasums += time_spent + print(shorten(filepath), shasum) + else: + print(shorten(filepath)) + + last_reported_bfile = filepath + + for assetpath in usage.files(): + assetpath = bpathlib.make_absolute(assetpath) + if assetpath in reported_assets: + log.debug('Already reported %s', assetpath) + continue + + if include_sha256: + shasum, time_spent = calc_sha_sum(assetpath) + time_spent_on_shasums += time_spent + print(' ', shorten(assetpath), shasum) + else: + print(' ', shorten(assetpath)) + reported_assets.add(assetpath) + + if show_timing: + duration = time.time() - start_time + print('Spent %.2f seconds on producing this listing' % duration) + if include_sha256: + print('Spent %.2f seconds on calculating SHA sums' % time_spent_on_shasums) + percentage = time_spent_on_shasums / duration * 100 + print(' (that is %d%% of the total time' % percentage) + + +class JSONSerialiser(json.JSONEncoder): + def default(self, o): + if isinstance(o, pathlib.Path): + return str(o) + if isinstance(o, set): + return sorted(o) + return super().default(o) + + +def report_json(bpath): + import collections + + # Mapping from blend file to its dependencies. + report = collections.defaultdict(set) + + for usage in trace.deps(bpath): + filepath = usage.block.bfile.filepath.absolute() + for assetpath in usage.files(): + assetpath = assetpath.resolve() + report[str(filepath)].add(assetpath) + + json.dump(report, sys.stdout, cls=JSONSerialiser, indent=4) diff --git a/cli/pack.py b/cli/pack.py new file mode 100644 index 0000000..b7b400b --- /dev/null +++ b/cli/pack.py @@ -0,0 +1,200 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Create a BAT-pack for the given blend file.""" +import logging +import pathlib +import sys +import typing + +import blender_asset_tracer.pack.transfer +from blender_asset_tracer import pack, bpathlib + +log = logging.getLogger(__name__) + + +def add_parser(subparsers): + """Add argparser for this subcommand.""" + + parser = subparsers.add_parser('pack', help=__doc__) + parser.set_defaults(func=cli_pack) + parser.add_argument('blendfile', type=pathlib.Path, + help='The Blend file to pack.') + parser.add_argument('target', type=str, + help="The target can be a directory, a ZIP file (does not have to exist " + "yet, just use 'something.zip' as target), " + "or a URL of S3 storage (s3://endpoint/path) " + "or Shaman storage (shaman://endpoint/#checkoutID).") + + parser.add_argument('-p', '--project', type=pathlib.Path, + help='Root directory of your project. Paths to below this directory are ' + 'kept in the BAT Pack as well, whereas references to assets from ' + 'outside this directory will have to be rewitten. The blend file MUST ' + 'be inside the project directory. If this option is ommitted, the ' + 'directory containing the blend file is taken as the project ' + 'directoy.') + parser.add_argument('-n', '--noop', default=False, action='store_true', + help="Don't copy files, just show what would be done.") + parser.add_argument('-e', '--exclude', nargs='*', default='', + help="Space-separated list of glob patterns (like '*.abc *.vbo') to " + "exclude.") + parser.add_argument('-c', '--compress', default=False, action='store_true', + help='Compress blend files while copying. This option is only valid when ' + 'packing into a directory (contrary to ZIP file or S3 upload). ' + 'Note that files will NOT be compressed when the destination file ' + 'already exists and has the same size as the original file.') + parser.add_argument('-r', '--relative-only', default=False, action='store_true', + help='Only pack assets that are referred to with a relative path (e.g. ' + 'starting with `//`.') + + +def cli_pack(args): + bpath, ppath, tpath = paths_from_cli(args) + + with create_packer(args, bpath, ppath, tpath) as packer: + packer.strategise() + try: + packer.execute() + except blender_asset_tracer.pack.transfer.FileTransferError as ex: + log.error("%d files couldn't be copied, starting with %s", + len(ex.files_remaining), ex.files_remaining[0]) + raise SystemExit(1) + + +def create_packer(args, bpath: pathlib.Path, ppath: pathlib.Path, target: str) -> pack.Packer: + if target.startswith('s3:/'): + if args.noop: + raise ValueError('S3 uploader does not support no-op.') + + if args.compress: + raise ValueError('S3 uploader does not support on-the-fly compression') + + if args.relative_only: + raise ValueError('S3 uploader does not support the --relative-only option') + + packer = create_s3packer(bpath, ppath, pathlib.PurePosixPath(target)) + + elif target.startswith('shaman+http:/') or target.startswith('shaman+https:/') \ + or target.startswith('shaman:/'): + if args.noop: + raise ValueError('Shaman uploader does not support no-op.') + + if args.compress: + raise ValueError('Shaman uploader does not support on-the-fly compression') + + if args.relative_only: + raise ValueError('Shaman uploader does not support the --relative-only option') + + packer = create_shamanpacker(bpath, ppath, target) + + elif target.lower().endswith('.zip'): + from blender_asset_tracer.pack import zipped + + if args.compress: + raise ValueError('ZIP packer does not support on-the-fly compression') + + packer = zipped.ZipPacker(bpath, ppath, target, noop=args.noop, + relative_only=args.relative_only) + else: + packer = pack.Packer(bpath, ppath, target, noop=args.noop, + compress=args.compress, relative_only=args.relative_only) + + if args.exclude: + # args.exclude is a list, due to nargs='*', so we have to split and flatten. + globs = [glob + for globs in args.exclude + for glob in globs.split()] + log.info('Excluding: %s', ', '.join(repr(g) for g in globs)) + packer.exclude(*globs) + return packer + + +def create_s3packer(bpath, ppath, tpath) -> pack.Packer: + from blender_asset_tracer.pack import s3 + + # Split the target path into 's3:/', hostname, and actual target path + parts = tpath.parts + endpoint = 'https://%s/' % parts[1] + tpath = pathlib.Path(*tpath.parts[2:]) + log.info('Uploading to S3-compatible storage %s at %s', endpoint, tpath) + + return s3.S3Packer(bpath, ppath, tpath, endpoint=endpoint) + + +def create_shamanpacker(bpath: pathlib.Path, ppath: pathlib.Path, tpath: str) -> pack.Packer: + """Creates a package for sending files to a Shaman server. + + URLs should have the form: + shaman://hostname/base/url#jobID + This uses HTTPS to connect to the server. To connect using HTTP, use: + shaman+http://hostname/base-url#jobID + """ + from blender_asset_tracer.pack import shaman + + endpoint, checkout_id = shaman.parse_endpoint(tpath) + if not checkout_id: + log.warning('No checkout ID given on the URL. Going to send BAT pack to Shaman, ' + 'but NOT creating a checkout') + + log.info('Uploading to Shaman server %s with job %s', endpoint, checkout_id) + return shaman.ShamanPacker(bpath, ppath, '/', endpoint=endpoint, checkout_id=checkout_id) + + +def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, str]: + """Return paths to blendfile, project, and pack target. + + Calls sys.exit() if anything is wrong. + """ + bpath = args.blendfile + if not bpath.exists(): + log.critical('File %s does not exist', bpath) + sys.exit(3) + if bpath.is_dir(): + log.critical('%s is a directory, should be a blend file') + sys.exit(3) + bpath = bpathlib.make_absolute(bpath) + + tpath = args.target + + if args.project is None: + ppath = bpathlib.make_absolute(bpath).parent + log.warning('No project path given, using %s', ppath) + else: + ppath = bpathlib.make_absolute(args.project) + + if not ppath.exists(): + log.critical('Project directory %s does not exist', ppath) + sys.exit(5) + + if not ppath.is_dir(): + log.warning('Project path %s is not a directory; using the parent %s', ppath, ppath.parent) + ppath = ppath.parent + + try: + bpath.relative_to(ppath) + except ValueError: + log.critical('Project directory %s does not contain blend file %s', + args.project, bpath.absolute()) + sys.exit(5) + + log.info('Blend file to pack: %s', bpath) + log.info('Project path: %s', ppath) + log.info('Pack will be created in: %s', tpath) + + return bpath, ppath, tpath diff --git a/compressor.py b/compressor.py new file mode 100644 index 0000000..c800322 --- /dev/null +++ b/compressor.py @@ -0,0 +1,77 @@ +"""shutil-like functionality while compressing blendfiles on the fly.""" + +import gzip +import logging +import pathlib +import shutil + +log = logging.getLogger(__name__) + +# Arbitrarily chosen block size, in bytes. +BLOCK_SIZE = 256 * 2 ** 10 + + +def move(src: pathlib.Path, dest: pathlib.Path): + """Move a file from src to dest, gzip-compressing if not compressed yet. + + Only compresses files ending in .blend; others are moved as-is. + """ + my_log = log.getChild('move') + my_log.debug('Moving %s to %s', src, dest) + + if src.suffix.lower() == '.blend': + _move_or_copy(src, dest, my_log, source_must_remain=False) + else: + shutil.move(str(src), str(dest)) + + +def copy(src: pathlib.Path, dest: pathlib.Path): + """Copy a file from src to dest, gzip-compressing if not compressed yet. + + Only compresses files ending in .blend; others are copied as-is. + """ + my_log = log.getChild('copy') + my_log.debug('Copying %s to %s', src, dest) + + if src.suffix.lower() == '.blend': + _move_or_copy(src, dest, my_log, source_must_remain=True) + else: + shutil.copy2(str(src), str(dest)) + + +def _move_or_copy(src: pathlib.Path, dest: pathlib.Path, + my_log: logging.Logger, + *, + source_must_remain: bool): + """Either move or copy a file, gzip-compressing if not compressed yet. + + :param src: File to copy/move. + :param dest: Path to copy/move to. + :source_must_remain: True to copy, False to move. + :my_log: Logger to use for logging. + """ + srcfile = src.open('rb') + try: + first_bytes = srcfile.read(2) + if first_bytes == b'\x1f\x8b': + # Already a gzipped file. + srcfile.close() + my_log.debug('Source file %s is GZipped already', src) + if source_must_remain: + shutil.copy2(str(src), str(dest)) + else: + shutil.move(str(src), str(dest)) + return + + my_log.debug('Compressing %s on the fly while copying to %s', src, dest) + with gzip.open(str(dest), mode='wb') as destfile: + destfile.write(first_bytes) + shutil.copyfileobj(srcfile, destfile, BLOCK_SIZE) + + srcfile.close() + if not source_must_remain: + my_log.debug('Deleting source file %s', src) + src.unlink() + finally: + if not srcfile.closed: + srcfile.close() diff --git a/pack/__init__.py b/pack/__init__.py new file mode 100644 index 0000000..92b77b1 --- /dev/null +++ b/pack/__init__.py @@ -0,0 +1,587 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import collections +import enum +import functools +import logging +import pathlib +import tempfile +import threading +import typing + +from blender_asset_tracer import trace, bpathlib, blendfile +from blender_asset_tracer.trace import file_sequence, result +from . import filesystem, transfer, progress + +log = logging.getLogger(__name__) + + +class PathAction(enum.Enum): + KEEP_PATH = 1 + FIND_NEW_LOCATION = 2 + + +class AssetAction: + """All the info required to rewrite blend files and copy assets.""" + + def __init__(self) -> None: + self.path_action = PathAction.KEEP_PATH + self.usages = [] # type: typing.List[result.BlockUsage] + """BlockUsage objects referring to this asset. + + Those BlockUsage objects could refer to data blocks in this blend file + (if the asset is a blend file) or in another blend file. + """ + + self.new_path = None # type: typing.Optional[pathlib.PurePath] + """Absolute path to the asset in the BAT Pack. + + This path may not exist on the local file system at all, for example + when uploading files to remote S3-compatible storage. + """ + + self.read_from = None # type: typing.Optional[pathlib.Path] + """Optional path from which to read the asset. + + This is used when blend files have been rewritten. It is assumed that + when this property is set, the file can be moved instead of copied. + """ + + self.rewrites = [] # type: typing.List[result.BlockUsage] + """BlockUsage objects in this asset that may require rewriting. + + Empty list if this AssetAction is not for a blend file. + """ + + +class Aborted(RuntimeError): + """Raised by Packer to abort the packing process. + + See the Packer.abort() function. + """ + + +class Packer: + """Takes a blend file and bundle it with its dependencies. + + The process is separated into two functions: + + - strategise() finds all the dependencies and determines what to do + with them. + - execute() performs the actual packing operation, by rewriting blend + files to ensure the paths to moved files are correct and + transferring the files. + + The file transfer is performed in a separate thread by a FileTransferer + instance. + """ + + def __init__(self, + bfile: pathlib.Path, + project: pathlib.Path, + target: str, + *, + noop=False, + compress=False, + relative_only=False) -> None: + self.blendfile = bfile + self.project = project + self.target = target + self._target_path = self._make_target_path(target) + self.noop = noop + self.compress = compress + self.relative_only = relative_only + self._aborted = threading.Event() + self._abort_lock = threading.RLock() + self._abort_reason = '' + + # Set this to a custom Callback() subclass instance before calling + # strategise() to receive progress reports. + self._progress_cb = progress.Callback() + self._tscb = progress.ThreadSafeCallback(self._progress_cb) + + self._exclude_globs = set() # type: typing.Set[str] + + from blender_asset_tracer.cli import common + self._shorten = functools.partial(common.shorten, self.project) + + if noop: + log.warning('Running in no-op mode, only showing what will be done.') + + # Filled by strategise() + self._actions = collections.defaultdict(AssetAction) \ + # type: typing.DefaultDict[pathlib.Path, AssetAction] + self.missing_files = set() # type: typing.Set[pathlib.Path] + self._new_location_paths = set() # type: typing.Set[pathlib.Path] + self._output_path = None # type: typing.Optional[pathlib.PurePath] + + # Filled by execute() + self._file_transferer = None # type: typing.Optional[transfer.FileTransferer] + + # Number of files we would copy, if not for --noop + self._file_count = 0 + + self._tmpdir = tempfile.TemporaryDirectory(prefix='bat-', suffix='-batpack') + self._rewrite_in = pathlib.Path(self._tmpdir.name) + + def _make_target_path(self, target: str) -> pathlib.PurePath: + """Return a Path for the given target. + + This can be the target directory itself, but can also be a non-existent + directory if the target doesn't support direct file access. It should + only be used to perform path operations, and never for file operations. + """ + return pathlib.Path(target).absolute() + + def close(self) -> None: + """Clean up any temporary files.""" + self._tscb.flush() + self._tmpdir.cleanup() + + def __enter__(self) -> 'Packer': + return self + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + self.close() + + @property + def output_path(self) -> pathlib.PurePath: + """The path of the packed blend file in the target directory.""" + assert self._output_path is not None + return self._output_path + + @property + def progress_cb(self) -> progress.Callback: + return self._progress_cb + + @progress_cb.setter + def progress_cb(self, new_progress_cb: progress.Callback): + self._tscb.flush() + self._progress_cb = new_progress_cb + self._tscb = progress.ThreadSafeCallback(self._progress_cb) + + def abort(self, reason='') -> None: + """Aborts the current packing process. + + Can be called from any thread. Aborts as soon as the running strategise + or execute function gets control over the execution flow, by raising + an Aborted exception. + """ + with self._abort_lock: + self._abort_reason = reason + if self._file_transferer: + self._file_transferer.abort() + self._aborted.set() + + def _check_aborted(self) -> None: + """Raises an Aborted exception when abort() was called.""" + + with self._abort_lock: + reason = self._abort_reason + if self._file_transferer is not None and self._file_transferer.has_error: + log.error('A transfer error occurred') + reason = self._file_transferer.error_message() + elif not self._aborted.is_set(): + return + + log.warning('Aborting') + self._tscb.flush() + self._progress_cb.pack_aborted(reason) + raise Aborted(reason) + + def exclude(self, *globs: str): + """Register glob-compatible patterns of files that should be ignored. + + Must be called before calling strategise(). + """ + if self._actions: + raise RuntimeError('%s.exclude() must be called before strategise()' % + self.__class__.__qualname__) + self._exclude_globs.update(globs) + + def strategise(self) -> None: + """Determine what to do with the assets. + + Places an asset into one of these categories: + - Can be copied as-is, nothing smart required. + - Blend files referring to this asset need to be rewritten. + + This function does *not* expand globs. Globs are seen as single + assets, and are only evaluated when performing the actual transfer + in the execute() function. + """ + + # The blendfile that we pack is generally not its own dependency, so + # we have to explicitly add it to the _packed_paths. + bfile_path = bpathlib.make_absolute(self.blendfile) + + # Both paths have to be resolved first, because this also translates + # network shares mapped to Windows drive letters back to their UNC + # notation. Only resolving one but not the other (which can happen + # with the abosolute() call above) can cause errors. + bfile_pp = self._target_path / bfile_path.relative_to(bpathlib.make_absolute(self.project)) + self._output_path = bfile_pp + + self._progress_cb.pack_start() + + act = self._actions[bfile_path] + act.path_action = PathAction.KEEP_PATH + act.new_path = bfile_pp + + self._check_aborted() + self._new_location_paths = set() + for usage in trace.deps(self.blendfile, self._progress_cb): + self._check_aborted() + asset_path = usage.abspath + if any(asset_path.match(glob) for glob in self._exclude_globs): + log.info('Excluding file: %s', asset_path) + continue + + if self.relative_only and not usage.asset_path.startswith(b'//'): + log.info('Skipping absolute path: %s', usage.asset_path) + continue + + if usage.is_sequence: + self._visit_sequence(asset_path, usage) + else: + self._visit_asset(asset_path, usage) + + self._find_new_paths() + self._group_rewrites() + + def _visit_sequence(self, asset_path: pathlib.Path, usage: result.BlockUsage): + assert usage.is_sequence + + for first_path in file_sequence.expand_sequence(asset_path): + if first_path.exists(): + break + else: + # At least the first file of a sequence must exist. + log.warning('Missing file: %s', asset_path) + self.missing_files.add(asset_path) + self._progress_cb.missing_file(asset_path) + return + + # Handle this sequence as an asset. + self._visit_asset(asset_path, usage) + + def _visit_asset(self, asset_path: pathlib.Path, usage: result.BlockUsage): + """Determine what to do with this asset. + + Determines where this asset will be packed, whether it needs rewriting, + and records the blend file data block referring to it. + """ + + # Sequences are allowed to not exist at this point. + if not usage.is_sequence and not asset_path.exists(): + log.warning('Missing file: %s', asset_path) + self.missing_files.add(asset_path) + self._progress_cb.missing_file(asset_path) + return + + bfile_path = usage.block.bfile.filepath.absolute() + self._progress_cb.trace_asset(asset_path) + + # Needing rewriting is not a per-asset thing, but a per-asset-per- + # blendfile thing, since different blendfiles can refer to it in + # different ways (for example with relative and absolute paths). + if usage.is_sequence: + first_path = next(file_sequence.expand_sequence(asset_path)) + else: + first_path = asset_path + path_in_project = self._path_in_project(first_path) + use_as_is = usage.asset_path.is_blendfile_relative() and path_in_project + needs_rewriting = not use_as_is + + act = self._actions[asset_path] + assert isinstance(act, AssetAction) + act.usages.append(usage) + + if needs_rewriting: + log.info('%s needs rewritten path to %s', bfile_path, usage.asset_path) + act.path_action = PathAction.FIND_NEW_LOCATION + self._new_location_paths.add(asset_path) + else: + log.debug('%s can keep using %s', bfile_path, usage.asset_path) + asset_pp = self._target_path / asset_path.relative_to(self.project) + act.new_path = asset_pp + + def _find_new_paths(self): + """Find new locations in the BAT Pack for the given assets.""" + + for path in self._new_location_paths: + act = self._actions[path] + assert isinstance(act, AssetAction) + + relpath = bpathlib.strip_root(path) + act.new_path = pathlib.Path(self._target_path, '_outside_project', relpath) + + def _group_rewrites(self) -> None: + """For each blend file, collect which fields need rewriting. + + This ensures that the execute() step has to visit each blend file + only once. + """ + + # Take a copy so we can modify self._actions in the loop. + actions = set(self._actions.values()) + + while actions: + action = actions.pop() + + if action.path_action != PathAction.FIND_NEW_LOCATION: + # This asset doesn't require a new location, so no rewriting necessary. + continue + + for usage in action.usages: + bfile_path = bpathlib.make_absolute(usage.block.bfile.filepath) + insert_new_action = bfile_path not in self._actions + + self._actions[bfile_path].rewrites.append(usage) + + if insert_new_action: + actions.add(self._actions[bfile_path]) + + def _path_in_project(self, path: pathlib.Path) -> bool: + abs_path = bpathlib.make_absolute(path) + abs_project = bpathlib.make_absolute(self.project) + try: + abs_path.relative_to(abs_project) + except ValueError: + return False + return True + + def execute(self) -> None: + """Execute the strategy.""" + assert self._actions, 'Run strategise() first' + + if not self.noop: + self._rewrite_paths() + + self._start_file_transferrer() + self._perform_file_transfer() + self._progress_cb.pack_done(self.output_path, self.missing_files) + + def _perform_file_transfer(self): + """Use file transferrer to do the actual file transfer. + + This is performed in a separate function, so that subclasses can + override this function to queue up copy/move actions first, and + then call this function. + """ + self._write_info_file() + self._copy_files_to_target() + + def _create_file_transferer(self) -> transfer.FileTransferer: + """Create a FileCopier(), can be overridden in a subclass.""" + + if self.compress: + return filesystem.CompressedFileCopier() + return filesystem.FileCopier() + + def _start_file_transferrer(self): + """Starts the file transferrer thread.""" + self._file_transferer = self._create_file_transferer() + self._file_transferer.progress_cb = self._tscb + if not self.noop: + self._file_transferer.start() + + def _copy_files_to_target(self) -> None: + """Copy all assets to the target directoy. + + This creates the BAT Pack but does not yet do any path rewriting. + """ + log.debug('Executing %d copy actions', len(self._actions)) + + assert self._file_transferer is not None + + try: + for asset_path, action in self._actions.items(): + self._check_aborted() + self._copy_asset_and_deps(asset_path, action) + + if self.noop: + log.info('Would copy %d files to %s', self._file_count, self.target) + return + self._file_transferer.done_and_join() + self._on_file_transfer_finished(file_transfer_completed=True) + except KeyboardInterrupt: + log.info('File transfer interrupted with Ctrl+C, aborting.') + self._file_transferer.abort_and_join() + self._on_file_transfer_finished(file_transfer_completed=False) + raise + finally: + self._tscb.flush() + self._check_aborted() + + # Make sure that the file transferer is no longer usable, for + # example to avoid it being involved in any following call to + # self.abort(). + self._file_transferer = None + + def _on_file_transfer_finished(self, *, file_transfer_completed: bool) -> None: + """Called when the file transfer is finished. + + This can be used in subclasses to perform cleanup on the file transferer, + or to obtain information from it before we destroy it. + """ + + def _rewrite_paths(self) -> None: + """Rewrite paths to the new location of the assets. + + Writes the rewritten blend files to a temporary location. + """ + + for bfile_path, action in self._actions.items(): + if not action.rewrites: + continue + self._check_aborted() + + assert isinstance(bfile_path, pathlib.Path) + # bfile_pp is the final path of this blend file in the BAT pack. + # It is used to determine relative paths to other blend files. + # It is *not* used for any disk I/O, since the file may not even + # exist on the local filesystem. + bfile_pp = action.new_path + assert bfile_pp is not None + + # Use tempfile to create a unique name in our temporary directoy. + # The file should be deleted when self.close() is called, and not + # when the bfile_tp object is GC'd. + bfile_tmp = tempfile.NamedTemporaryFile(dir=str(self._rewrite_in), + prefix='bat-', + suffix='-' + bfile_path.name, + delete=False) + bfile_tp = pathlib.Path(bfile_tmp.name) + action.read_from = bfile_tp + log.info('Rewriting %s to %s', bfile_path, bfile_tp) + + # The original blend file will have been cached, so we can use it + # to avoid re-parsing all data blocks in the to-be-rewritten file. + bfile = blendfile.open_cached(bfile_path, assert_cached=True) + bfile.copy_and_rebind(bfile_tp, mode='rb+') + + for usage in action.rewrites: + self._check_aborted() + assert isinstance(usage, result.BlockUsage) + asset_pp = self._actions[usage.abspath].new_path + assert isinstance(asset_pp, pathlib.Path) + + log.debug(' - %s is packed at %s', usage.asset_path, asset_pp) + relpath = bpathlib.BlendPath.mkrelative(asset_pp, bfile_pp) + if relpath == usage.asset_path: + log.info(' - %s remained at %s', usage.asset_path, relpath) + continue + + log.info(' - %s moved to %s', usage.asset_path, relpath) + + # Find the same block in the newly copied file. + block = bfile.dereference_pointer(usage.block.addr_old) + if usage.path_full_field is None: + dir_field = usage.path_dir_field + assert dir_field is not None + log.debug(' - updating field %s of block %s', + dir_field.name.name_only, + block) + reldir = bpathlib.BlendPath.mkrelative(asset_pp.parent, bfile_pp) + written = block.set(dir_field.name.name_only, reldir) + log.debug(' - written %d bytes', written) + + # BIG FAT ASSUMPTION that the filename (e.g. basename + # without path) does not change. This makes things much + # easier, as in the sequence editor the directory and + # filename fields are in different blocks. See the + # blocks2assets.scene() function for the implementation. + else: + log.debug(' - updating field %s of block %s', + usage.path_full_field.name.name_only, block) + written = block.set(usage.path_full_field.name.name_only, relpath) + log.debug(' - written %d bytes', written) + + # Make sure we close the file, otherwise changes may not be + # flushed before it gets copied. + if bfile.is_modified: + self._progress_cb.rewrite_blendfile(bfile_path) + bfile.close() + + def _copy_asset_and_deps(self, asset_path: pathlib.Path, action: AssetAction): + # Copy the asset itself, but only if it's not a sequence (sequences are + # handled below in the for-loop). + if '*' not in str(asset_path): + packed_path = action.new_path + assert packed_path is not None + read_path = action.read_from or asset_path + self._send_to_target(read_path, packed_path, + may_move=action.read_from is not None) + + # Copy its sequence dependencies. + for usage in action.usages: + if not usage.is_sequence: + continue + + first_pp = self._actions[usage.abspath].new_path + assert first_pp is not None + + # In case of globbing, we only support globbing by filename, + # and not by directory. + assert '*' not in str(first_pp) or '*' in first_pp.name + + packed_base_dir = first_pp.parent + for file_path in usage.files(): + packed_path = packed_base_dir / file_path.name + # Assumption: assets in a sequence are never blend files. + self._send_to_target(file_path, packed_path) + + # Assumption: all data blocks using this asset use it the same way. + break + + def _send_to_target(self, + asset_path: pathlib.Path, + target: pathlib.PurePath, + may_move=False): + if self.noop: + print('%s -> %s' % (asset_path, target)) + self._file_count += 1 + return + + verb = 'move' if may_move else 'copy' + log.debug('Queueing %s of %s', verb, asset_path) + + self._tscb.flush() + + assert self._file_transferer is not None + if may_move: + self._file_transferer.queue_move(asset_path, target) + else: + self._file_transferer.queue_copy(asset_path, target) + + def _write_info_file(self): + """Write a little text file with info at the top of the pack.""" + + infoname = 'pack-info.txt' + infopath = self._rewrite_in / infoname + log.debug('Writing info to %s', infopath) + with infopath.open('wt', encoding='utf8') as infofile: + print('This is a Blender Asset Tracer pack.', file=infofile) + print('Start by opening the following blend file:', file=infofile) + print(' %s' % self._output_path.relative_to(self._target_path).as_posix(), + file=infofile) + + self._file_transferer.queue_move(infopath, self._target_path / infoname) diff --git a/pack/filesystem.py b/pack/filesystem.py new file mode 100644 index 0000000..deba9db --- /dev/null +++ b/pack/filesystem.py @@ -0,0 +1,273 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import logging +import multiprocessing.pool +import pathlib +import shutil +import typing + +from .. import compressor +from . import transfer + +log = logging.getLogger(__name__) + + +class AbortTransfer(Exception): + """Raised when an error was detected and file transfer should be aborted.""" + + +class FileCopier(transfer.FileTransferer): + """Copies or moves files in source directory order.""" + + # When we don't compress the files, the process is I/O bound, + # and trashing the storage by using multiple threads will + # only slow things down. + transfer_threads = 1 # type: typing.Optional[int] + + def __init__(self): + super().__init__() + self.files_transferred = 0 + self.files_skipped = 0 + self.already_copied = set() + + # (is_dir, action) + self.transfer_funcs = { + (False, transfer.Action.COPY): self.copyfile, + (True, transfer.Action.COPY): self.copytree, + (False, transfer.Action.MOVE): self.move, + (True, transfer.Action.MOVE): self.move, + } + + def run(self) -> None: + + pool = multiprocessing.pool.ThreadPool(processes=self.transfer_threads) + dst = pathlib.Path() + for src, pure_dst, act in self.iter_queue(): + try: + dst = pathlib.Path(pure_dst) + + if self.has_error or self._abort.is_set(): + raise AbortTransfer() + + if self._skip_file(src, dst, act): + continue + + # We want to do this in this thread, as it's not thread safe itself. + dst.parent.mkdir(parents=True, exist_ok=True) + + pool.apply_async(self._thread, (src, dst, act)) + except AbortTransfer: + # either self._error or self._abort is already set. We just have to + # let the system know we didn't handle those files yet. + self.queue.put((src, dst, act), timeout=1.0) + except Exception as ex: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + if self._abort.is_set(): + log.debug('Error transferring %s to %s: %s', src, dst, ex) + else: + msg = 'Error transferring %s to %s' % (src, dst) + log.exception(msg) + self.error_set(msg) + # Put the files to copy back into the queue, and abort. This allows + # the main thread to inspect the queue and see which files were not + # copied. The one we just failed (due to this exception) should also + # be reported there. + self.queue.put((src, dst, act), timeout=1.0) + break + + log.debug('All transfer threads queued') + pool.close() + log.debug('Waiting for transfer threads to finish') + pool.join() + log.debug('All transfer threads finished') + + if self.files_transferred: + log.info('Transferred %d files', self.files_transferred) + if self.files_skipped: + log.info('Skipped %d files', self.files_skipped) + + def _thread(self, src: pathlib.Path, dst: pathlib.Path, act: transfer.Action): + try: + tfunc = self.transfer_funcs[src.is_dir(), act] + + if self.has_error or self._abort.is_set(): + raise AbortTransfer() + + log.info('%s %s -> %s', act.name, src, dst) + tfunc(src, dst) + except AbortTransfer: + # either self._error or self._abort is already set. We just have to + # let the system know we didn't handle those files yet. + self.queue.put((src, dst, act), timeout=1.0) + except Exception as ex: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + if self._abort.is_set(): + log.debug('Error transferring %s to %s: %s', src, dst, ex) + else: + msg = 'Error transferring %s to %s' % (src, dst) + log.exception(msg) + self.error_set(msg) + # Put the files to copy back into the queue, and abort. This allows + # the main thread to inspect the queue and see which files were not + # copied. The one we just failed (due to this exception) should also + # be reported there. + self.queue.put((src, dst, act), timeout=1.0) + + def _skip_file(self, src: pathlib.Path, dst: pathlib.Path, act: transfer.Action) -> bool: + """Skip this file (return True) or not (return False).""" + st_src = src.stat() # must exist, or it wouldn't be queued. + if not dst.exists(): + return False + + st_dst = dst.stat() + if st_dst.st_size != st_src.st_size or st_dst.st_mtime < st_src.st_mtime: + return False + + log.info('SKIP %s; already exists', src) + if act == transfer.Action.MOVE: + log.debug('Deleting %s', src) + src.unlink() + self.files_skipped += 1 + return True + + def _move(self, srcpath: pathlib.Path, dstpath: pathlib.Path): + """Low-level file move""" + shutil.move(str(srcpath), str(dstpath)) + + def _copy(self, srcpath: pathlib.Path, dstpath: pathlib.Path): + """Low-level file copy""" + shutil.copy2(str(srcpath), str(dstpath)) + + def move(self, srcpath: pathlib.Path, dstpath: pathlib.Path): + s_stat = srcpath.stat() + self._move(srcpath, dstpath) + + self.files_transferred += 1 + self.report_transferred(s_stat.st_size) + + def copyfile(self, srcpath: pathlib.Path, dstpath: pathlib.Path): + """Copy a file, skipping when it already exists.""" + + if self._abort.is_set() or self.has_error: + return + + if (srcpath, dstpath) in self.already_copied: + log.debug('SKIP %s; already copied', srcpath) + return + + s_stat = srcpath.stat() # must exist, or it wouldn't be queued. + if dstpath.exists(): + d_stat = dstpath.stat() + if d_stat.st_size == s_stat.st_size and d_stat.st_mtime >= s_stat.st_mtime: + log.info('SKIP %s; already exists', srcpath) + self.progress_cb.transfer_file_skipped(srcpath, dstpath) + self.files_skipped += 1 + return + + log.debug('Copying %s -> %s', srcpath, dstpath) + self._copy(srcpath, dstpath) + + self.already_copied.add((srcpath, dstpath)) + self.files_transferred += 1 + + self.report_transferred(s_stat.st_size) + + def copytree(self, src: pathlib.Path, dst: pathlib.Path, + symlinks=False, ignore_dangling_symlinks=False): + """Recursively copy a directory tree. + + Copy of shutil.copytree() with some changes: + + - Using pathlib + - The destination directory may already exist. + - Existing files with the same file size are skipped. + - Removed ability to ignore things. + """ + + if (src, dst) in self.already_copied: + log.debug('SKIP %s; already copied', src) + return + + if self.has_error or self._abort.is_set(): + raise AbortTransfer() + + dst.mkdir(parents=True, exist_ok=True) + errors = [] # type: typing.List[typing.Tuple[pathlib.Path, pathlib.Path, str]] + for srcpath in src.iterdir(): + if self.has_error or self._abort.is_set(): + raise AbortTransfer() + + dstpath = dst / srcpath.name + try: + if srcpath.is_symlink(): + linkto = srcpath.resolve() + if symlinks: + # We can't just leave it to `copy_function` because legacy + # code with a custom `copy_function` may rely on copytree + # doing the right thing. + linkto.symlink_to(dstpath) + shutil.copystat(str(srcpath), str(dstpath), follow_symlinks=not symlinks) + else: + # ignore dangling symlink if the flag is on + if not linkto.exists() and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + if srcpath.is_dir(): + self.copytree(srcpath, dstpath, symlinks) + else: + self.copyfile(srcpath, dstpath) + elif srcpath.is_dir(): + self.copytree(srcpath, dstpath, symlinks) + else: + # Will raise a SpecialFileError for unsupported file types + self.copyfile(srcpath, dstpath) + # catch the Error from the recursive copytree so that we can + # continue with other files + except shutil.Error as err: + errors.extend(err.args[0]) + except OSError as why: + errors.append((srcpath, dstpath, str(why))) + try: + shutil.copystat(str(src), str(dst)) + except OSError as why: + # Copying file access times may fail on Windows + if getattr(why, 'winerror', None) is None: + errors.append((src, dst, str(why))) + if errors: + raise shutil.Error(errors) + + self.already_copied.add((src, dst)) + + return dst + + +class CompressedFileCopier(FileCopier): + # When we compress the files on the fly, the process is CPU-bound + # so we benefit greatly by multi-threading (packing a Spring scene + # lighting file took 6m30s single-threaded and 2min13 multi-threaded. + transfer_threads = None # type: typing.Optional[int] + + def _move(self, srcpath: pathlib.Path, dstpath: pathlib.Path): + compressor.move(srcpath, dstpath) + + def _copy(self, srcpath: pathlib.Path, dstpath: pathlib.Path): + compressor.copy(srcpath, dstpath) diff --git a/pack/progress.py b/pack/progress.py new file mode 100644 index 0000000..4b5e02e --- /dev/null +++ b/pack/progress.py @@ -0,0 +1,148 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Callback class definition for BAT Pack progress reporting.""" +import threading + +import functools +import logging +import pathlib +import queue +import typing + +import blender_asset_tracer.trace.progress + +log = logging.getLogger(__name__) + + +class Callback(blender_asset_tracer.trace.progress.Callback): + """BAT Pack progress reporting.""" + + def pack_start(self) -> None: + """Called when packing starts.""" + + def pack_done(self, + output_blendfile: pathlib.PurePath, + missing_files: typing.Set[pathlib.Path]) -> None: + """Called when packing is done.""" + + def pack_aborted(self, reason: str): + """Called when packing was aborted.""" + + def trace_blendfile(self, filename: pathlib.Path) -> None: + """Called for every blendfile opened when tracing dependencies.""" + + def trace_asset(self, filename: pathlib.Path) -> None: + """Called for every asset found when tracing dependencies. + + Note that this can also be a blend file. + """ + + def rewrite_blendfile(self, orig_filename: pathlib.Path) -> None: + """Called for every rewritten blendfile.""" + + def transfer_file(self, src: pathlib.Path, dst: pathlib.PurePath) -> None: + """Called when a file transfer starts.""" + + def transfer_file_skipped(self, src: pathlib.Path, dst: pathlib.PurePath) -> None: + """Called when a file is skipped because it already exists.""" + + def transfer_progress(self, total_bytes: int, transferred_bytes: int) -> None: + """Called during file transfer, with per-pack info (not per file). + + :param total_bytes: The total amount of bytes to be transferred for + the current packing operation. This can increase while transfer + is happening, when more files are discovered (because transfer + starts in a separate thread before all files are found). + :param transferred_bytes: The total amount of bytes transfered for + the current packing operation. + """ + + def missing_file(self, filename: pathlib.Path) -> None: + """Called for every asset that does not exist on the filesystem.""" + + +class ThreadSafeCallback(Callback): + """Thread-safe wrapper for Callback instances. + + Progress calls are queued until flush() is called. The queued calls are + called in the same thread as the one calling flush(). + """ + + def __init__(self, wrapped: Callback) -> None: + self.log = log.getChild('ThreadSafeCallback') + self.wrapped = wrapped + + # Thread-safe queue for passing progress reports on the main thread. + self._reporting_queue = queue.Queue() # type: queue.Queue[typing.Callable] + self._main_thread_id = threading.get_ident() + + def _queue(self, func: typing.Callable, *args, **kwargs): + partial = functools.partial(func, *args, **kwargs) + + if self._main_thread_id == threading.get_ident(): + partial() + else: + self._reporting_queue.put(partial) + + def pack_start(self) -> None: + self._queue(self.wrapped.pack_start) + + def pack_done(self, + output_blendfile: pathlib.PurePath, + missing_files: typing.Set[pathlib.Path]) -> None: + self._queue(self.wrapped.pack_done, output_blendfile, missing_files) + + def pack_aborted(self, reason: str): + self._queue(self.wrapped.pack_aborted, reason) + + def trace_blendfile(self, filename: pathlib.Path) -> None: + self._queue(self.wrapped.trace_blendfile, filename) + + def trace_asset(self, filename: pathlib.Path) -> None: + self._queue(self.wrapped.trace_asset, filename) + + def transfer_file(self, src: pathlib.Path, dst: pathlib.PurePath) -> None: + self._queue(self.wrapped.transfer_file, src, dst) + + def transfer_file_skipped(self, src: pathlib.Path, dst: pathlib.PurePath) -> None: + self._queue(self.wrapped.transfer_file_skipped, src, dst) + + def transfer_progress(self, total_bytes: int, transferred_bytes: int) -> None: + self._queue(self.wrapped.transfer_progress, total_bytes, transferred_bytes) + + def missing_file(self, filename: pathlib.Path) -> None: + self._queue(self.wrapped.missing_file, filename) + + def flush(self, timeout: float = None) -> None: + """Call the queued calls, call this in the main thread.""" + + while True: + try: + call = self._reporting_queue.get(block=timeout is not None, + timeout=timeout) + except queue.Empty: + return + + try: + call() + except Exception: + # Don't let the handling of one callback call + # block the entire flush process. + self.log.exception('Error calling %s', call) diff --git a/pack/s3.py b/pack/s3.py new file mode 100644 index 0000000..d12c54d --- /dev/null +++ b/pack/s3.py @@ -0,0 +1,182 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Amazon S3-compatible uploader.""" +import hashlib +import logging +import pathlib +import typing +import urllib.parse + +from . import Packer, transfer + +log = logging.getLogger(__name__) + + +# TODO(Sybren): compute MD5 sums of queued files in a separate thread, so that +# we can upload a file to S3 and compute an MD5 of another file simultaneously. + +def compute_md5(filepath: pathlib.Path) -> str: + log.debug('Computing MD5sum of %s', filepath) + hasher = hashlib.md5() + with filepath.open('rb') as infile: + while True: + block = infile.read(102400) + if not block: + break + hasher.update(block) + md5 = hasher.hexdigest() + log.debug('MD5sum of %s is %s', filepath, md5) + return md5 + + +class S3Packer(Packer): + """Creates BAT Packs on S3-compatible storage.""" + + def __init__(self, *args, endpoint, **kwargs) -> None: + """Constructor + + :param endpoint: URL of the S3 storage endpoint + """ + super().__init__(*args, **kwargs) + import boto3 + + # Create a session so that credentials can be read from the [endpoint] + # section in ~/.aws/credentials. + # See https://boto3.readthedocs.io/en/latest/guide/configuration.html#guide-configuration + components = urllib.parse.urlparse(endpoint) + profile_name = components.netloc + endpoint = urllib.parse.urlunparse(components) + log.debug('Using Boto3 profile name %r for url %r', profile_name, endpoint) + self.session = boto3.Session(profile_name=profile_name) + + self.client = self.session.client('s3', endpoint_url=endpoint) + + def set_credentials(self, + endpoint: str, + access_key_id: str, + secret_access_key: str): + """Set S3 credentials.""" + self.client = self.session.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_access_key) + + def _create_file_transferer(self) -> transfer.FileTransferer: + return S3Transferrer(self.client) + + +class S3Transferrer(transfer.FileTransferer): + """Copies or moves files in source directory order.""" + + class AbortUpload(Exception): + """Raised from the upload callback to abort an upload.""" + + def __init__(self, botoclient) -> None: + super().__init__() + self.client = botoclient + + def run(self) -> None: + files_transferred = 0 + files_skipped = 0 + + for src, dst, act in self.iter_queue(): + try: + did_upload = self.upload_file(src, dst) + files_transferred += did_upload + files_skipped += not did_upload + + if act == transfer.Action.MOVE: + self.delete_file(src) + except Exception: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + log.exception('Error transferring %s to %s', src, dst) + # Put the files to copy back into the queue, and abort. This allows + # the main thread to inspect the queue and see which files were not + # copied. The one we just failed (due to this exception) should also + # be reported there. + self.queue.put((src, dst, act)) + return + + if files_transferred: + log.info('Transferred %d files', files_transferred) + if files_skipped: + log.info('Skipped %d files', files_skipped) + + def upload_file(self, src: pathlib.Path, dst: pathlib.PurePath) -> bool: + """Upload a file to an S3 bucket. + + The first part of 'dst' is used as the bucket name, the remained as the + path inside the bucket. + + :returns: True if the file was uploaded, False if it was skipped. + """ + bucket = dst.parts[0] + dst_path = pathlib.Path(*dst.parts[1:]) + md5 = compute_md5(src) + key = str(dst_path) + + existing_md5, existing_size = self.get_metadata(bucket, key) + if md5 == existing_md5 and src.stat().st_size == existing_size: + log.debug('skipping %s, it already exists on the server with MD5 %s', + src, existing_md5) + return False + + log.info('Uploading %s', src) + try: + self.client.upload_file(str(src), + Bucket=bucket, + Key=key, + Callback=self.report_transferred, + ExtraArgs={'Metadata': {'md5': md5}}) + except self.AbortUpload: + return False + return True + + def report_transferred(self, bytes_transferred: int): + if self._abort.is_set(): + log.warning('Interrupting ongoing upload') + raise self.AbortUpload('interrupting ongoing upload') + super().report_transferred(bytes_transferred) + + def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]: + """Get MD5 sum and size on S3. + + :returns: the MD5 hexadecimal hash and the file size in bytes. + If the file does not exist or has no known MD5 sum, + returns ('', -1) + """ + import botocore.exceptions + + log.debug('Getting metadata of %s/%s', bucket, key) + try: + info = self.client.head_object(Bucket=bucket, Key=key) + except botocore.exceptions.ClientError as ex: + error_code = ex.response.get('Error').get('Code', 'Unknown') + # error_code already is a string, but this makes the code forward + # compatible with a time where they use integer codes. + if str(error_code) == '404': + return '', -1 + raise ValueError('error response:' % ex.response) from None + + try: + return info['Metadata']['md5'], info['ContentLength'] + except KeyError: + return '', -1 diff --git a/pack/shaman/__init__.py b/pack/shaman/__init__.py new file mode 100644 index 0000000..21543a3 --- /dev/null +++ b/pack/shaman/__init__.py @@ -0,0 +1,130 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2019, Blender Foundation - Sybren A. Stüvel +"""Shaman Client interface.""" +import logging +import os +import pathlib +import typing +import urllib.parse + +import requests + +import blender_asset_tracer.pack as bat_pack +import blender_asset_tracer.pack.transfer as bat_transfer + +from .transfer import ShamanTransferrer +from .client import ShamanClient + +log = logging.getLogger(__name__) + + +class ShamanPacker(bat_pack.Packer): + """Creates BAT Packs on a Shaman server.""" + + def __init__(self, + bfile: pathlib.Path, + project: pathlib.Path, + target: str, + endpoint: str, + checkout_id: str, + **kwargs) -> None: + """Constructor + + :param target: mock target '/' to construct project-relative paths. + :param endpoint: URL of the Shaman endpoint. + """ + super().__init__(bfile, project, target, **kwargs) + self.checkout_id = checkout_id + self.shaman_endpoint = endpoint + self._checkout_location = '' + + def _get_auth_token(self) -> str: + # TODO: get a token from the Flamenco Server. + token_from_env = os.environ.get('SHAMAN_JWT_TOKEN') + if token_from_env: + return token_from_env + + log.warning('Using temporary hack to get auth token from Shaman, ' + 'set SHAMAN_JTW_TOKEN to prevent') + unauth_shaman = ShamanClient('', self.shaman_endpoint) + resp = unauth_shaman.get('get-token', timeout=10) + resp.raise_for_status() + return resp.text + + def _create_file_transferer(self) -> bat_transfer.FileTransferer: + # TODO: pass self._get_auth_token itself, so that the Transferer will be able to + # decide when to get this token (and how many times). + auth_token = self._get_auth_token() + return ShamanTransferrer(auth_token, self.project, self.shaman_endpoint, self.checkout_id) + + def _make_target_path(self, target: str) -> pathlib.PurePath: + return pathlib.PurePosixPath('/') + + def _on_file_transfer_finished(self, *, file_transfer_completed: bool): + super()._on_file_transfer_finished(file_transfer_completed=file_transfer_completed) + + assert isinstance(self._file_transferer, ShamanTransferrer) + self._checkout_location = self._file_transferer.checkout_location + + @property + def checkout_location(self) -> str: + """Return the checkout location of the packed blend file. + + :return: the checkout location, or '' if no checkout was made. + """ + return self._checkout_location + + @property + def output_path(self) -> pathlib.PurePath: + """The path of the packed blend file in the target directory.""" + assert self._output_path is not None + + checkout_location = pathlib.PurePosixPath(self._checkout_location) + rel_output = self._output_path.relative_to(self._target_path) + return checkout_location / rel_output + + def execute(self): + try: + super().execute() + except requests.exceptions.ConnectionError as ex: + log.exception('Error communicating with Shaman') + self.abort(str(ex)) + self._check_aborted() + + +def parse_endpoint(shaman_url: str) -> typing.Tuple[str, str]: + """Convert shaman://hostname/path#checkoutID into endpoint URL + checkout ID.""" + + urlparts = urllib.parse.urlparse(str(shaman_url)) + + if urlparts.scheme in {'shaman', 'shaman+https'}: + scheme = 'https' + elif urlparts.scheme == 'shaman+http': + scheme = 'http' + else: + raise ValueError('Invalid scheme %r, choose shaman:// or shaman+http://', urlparts.scheme) + + checkout_id = urllib.parse.unquote(urlparts.fragment) + + path = urlparts.path or '/' + new_urlparts = (scheme, urlparts.netloc, path, *urlparts[3:-1], '') + endpoint = urllib.parse.urlunparse(new_urlparts) + + return endpoint, checkout_id diff --git a/pack/shaman/cache.py b/pack/shaman/cache.py new file mode 100644 index 0000000..fc08257 --- /dev/null +++ b/pack/shaman/cache.py @@ -0,0 +1,197 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2019, Blender Foundation - Sybren A. Stüvel + +import base64 +import hashlib +import json +import logging +import sys +import time +import typing +from collections import deque +from pathlib import Path + +from . import time_tracker + +CACHE_ROOT = Path().home() / '.cache/shaman-client/shasums' +MAX_CACHE_FILES_AGE_SECS = 3600 * 24 * 60 # 60 days + +log = logging.getLogger(__name__) + + +class TimeInfo: + computing_checksums = 0.0 + checksum_cache_handling = 0.0 + + +def find_files(root: Path) -> typing.Iterable[Path]: + """Recursively finds files in the given root path. + + Directories are recursed into, and file paths are yielded. + Symlinks are yielded if they refer to a regular file. + """ + queue = deque([root]) + while queue: + path = queue.popleft() + + # Ignore hidden files/dirs; these can be things like '.svn' or '.git', + # which shouldn't be sent to Shaman. + if path.name.startswith('.'): + continue + + if path.is_dir(): + for child in path.iterdir(): + queue.append(child) + continue + + # Only yield symlinks if they link to (a link to) a normal file. + if path.is_symlink(): + symlinked = path.resolve() + if symlinked.is_file(): + yield path + continue + + if path.is_file(): + yield path + + +def compute_checksum(filepath: Path) -> str: + """Compute the SHA256 checksum for the given file.""" + blocksize = 32 * 1024 + + log.debug('Computing checksum of %s', filepath) + with time_tracker.track_time(TimeInfo, 'computing_checksums'): + hasher = hashlib.sha256() + with filepath.open('rb') as infile: + while True: + block = infile.read(blocksize) + if not block: + break + hasher.update(block) + checksum = hasher.hexdigest() + return checksum + + +def _cache_path(filepath: Path) -> Path: + """Compute the cache file for the given file path.""" + + fs_encoding = sys.getfilesystemencoding() + filepath = filepath.absolute() + + # Reverse the directory, because most variation is in the last bytes. + rev_dir = str(filepath.parent)[::-1] + encoded_path = filepath.stem + rev_dir + filepath.suffix + cache_key = base64.urlsafe_b64encode(encoded_path.encode(fs_encoding)).decode().rstrip('=') + + cache_path = CACHE_ROOT / cache_key[:10] / cache_key[10:] + return cache_path + + +def compute_cached_checksum(filepath: Path) -> str: + """Computes the SHA256 checksum. + + The checksum is cached to disk. If the cache is still valid, it is used to + skip the actual SHA256 computation. + """ + + with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'): + current_stat = filepath.stat() + cache_path = _cache_path(filepath) + + try: + with cache_path.open('r') as cache_file: + payload = json.load(cache_file) + except (OSError, ValueError): + # File may not exist, or have invalid contents. + pass + else: + checksum = payload.get('checksum', '') + cached_mtime = payload.get('file_mtime', 0.0) + cached_size = payload.get('file_size', -1) + + if (checksum + and current_stat.st_size == cached_size + and abs(cached_mtime - current_stat.st_mtime) < 0.01): + cache_path.touch() + return checksum + + checksum = compute_checksum(filepath) + + with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'): + payload = { + 'checksum': checksum, + 'file_mtime': current_stat.st_mtime, + 'file_size': current_stat.st_size, + } + + try: + cache_path.parent.mkdir(parents=True, exist_ok=True) + with cache_path.open('w') as cache_file: + json.dump(payload, cache_file) + except IOError as ex: + log.warning('Unable to write checksum cache file %s: %s', cache_path, ex) + + return checksum + + +def cleanup_cache() -> None: + """Remove all cache files that are older than MAX_CACHE_FILES_AGE_SECS.""" + + if not CACHE_ROOT.exists(): + return + + with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'): + queue = deque([CACHE_ROOT]) + rmdir_queue = [] + + now = time.time() + num_removed_files = 0 + num_removed_dirs = 0 + while queue: + path = queue.popleft() + + if path.is_dir(): + queue.extend(path.iterdir()) + rmdir_queue.append(path) + continue + + assert path.is_file() + path.relative_to(CACHE_ROOT) + + age = now - path.stat().st_mtime + # Don't trust files from the future either. + if 0 <= age <= MAX_CACHE_FILES_AGE_SECS: + continue + + path.unlink() + num_removed_files += 1 + + for dirpath in reversed(rmdir_queue): + assert dirpath.is_dir() + dirpath.relative_to(CACHE_ROOT) + + try: + dirpath.rmdir() + num_removed_dirs += 1 + except OSError: + pass + + if num_removed_dirs or num_removed_files: + log.info('Cache Cleanup: removed %d dirs and %d files', num_removed_dirs, num_removed_files) diff --git a/pack/shaman/client.py b/pack/shaman/client.py new file mode 100644 index 0000000..7a51ec0 --- /dev/null +++ b/pack/shaman/client.py @@ -0,0 +1,129 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2019, Blender Foundation - Sybren A. Stüvel + +import urllib.parse + +import requests.packages.urllib3.util.retry +import requests.adapters + + +class ShamanClient: + """Thin wrapper around a Requests session to perform Shaman requests.""" + + def __init__(self, auth_token: str, base_url: str): + self._auth_token = auth_token + self._base_url = base_url + + retries = requests.packages.urllib3.util.retry.Retry( + total=10, + backoff_factor=0.05, + ) + http_adapter = requests.adapters.HTTPAdapter(max_retries=retries) + self._session = requests.session() + self._session.mount('https://', http_adapter) + self._session.mount('http://', http_adapter) + + if auth_token: + self._session.headers['Authorization'] = 'Bearer ' + auth_token + + def request(self, method: str, url: str, **kwargs) -> requests.Response: + kwargs.setdefault('timeout', 300) + full_url = urllib.parse.urljoin(self._base_url, url) + return self._session.request(method, full_url, **kwargs) + + def get(self, url, **kwargs): + r"""Sends a GET request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + kwargs.setdefault('allow_redirects', True) + return self.request('GET', url, **kwargs) + + def options(self, url, **kwargs): + r"""Sends a OPTIONS request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + kwargs.setdefault('allow_redirects', True) + return self.request('OPTIONS', url, **kwargs) + + def head(self, url, **kwargs): + r"""Sends a HEAD request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + kwargs.setdefault('allow_redirects', False) + return self.request('HEAD', url, **kwargs) + + def post(self, url, data=None, json=None, **kwargs): + r"""Sends a POST request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param json: (optional) json to send in the body of the :class:`Request`. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + return self.request('POST', url, data=data, json=json, **kwargs) + + def put(self, url, data=None, **kwargs): + r"""Sends a PUT request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + return self.request('PUT', url, data=data, **kwargs) + + def patch(self, url, data=None, **kwargs): + r"""Sends a PATCH request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param data: (optional) Dictionary, list of tuples, bytes, or file-like + object to send in the body of the :class:`Request`. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + return self.request('PATCH', url, data=data, **kwargs) + + def delete(self, url, **kwargs): + r"""Sends a DELETE request. Returns :class:`Response` object. + + :param url: URL for the new :class:`Request` object. + :param kwargs: Optional arguments that ``request`` takes. + :rtype: requests.Response + """ + + return self.request('DELETE', url, **kwargs) diff --git a/pack/shaman/time_tracker.py b/pack/shaman/time_tracker.py new file mode 100644 index 0000000..5279bc3 --- /dev/null +++ b/pack/shaman/time_tracker.py @@ -0,0 +1,32 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2019, Blender Foundation - Sybren A. Stüvel +import contextlib +import time +import typing + + +@contextlib.contextmanager +def track_time(tracker_object: typing.Any, attribute: str): + """Context manager, tracks how long the context took to run.""" + start_time = time.monotonic() + yield + duration = time.monotonic() - start_time + tracked_so_far = getattr(tracker_object, attribute, 0.0) + setattr(tracker_object, attribute, tracked_so_far + duration) diff --git a/pack/shaman/transfer.py b/pack/shaman/transfer.py new file mode 100644 index 0000000..20e6821 --- /dev/null +++ b/pack/shaman/transfer.py @@ -0,0 +1,359 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2019, Blender Foundation - Sybren A. Stüvel +import collections +import logging +import pathlib +import random +import typing + +import requests + +import blender_asset_tracer.pack.transfer as bat_transfer +from blender_asset_tracer import bpathlib + +MAX_DEFERRED_PATHS = 8 +MAX_FAILED_PATHS = 8 + +response_file_unknown = "file-unknown" +response_already_uploading = "already-uploading" + + +class FileInfo: + def __init__(self, checksum: str, filesize: int, abspath: pathlib.Path): + self.checksum = checksum + self.filesize = filesize + self.abspath = abspath + + +class ShamanTransferrer(bat_transfer.FileTransferer): + """Sends files to a Shaman server.""" + + class AbortUpload(Exception): + """Raised from the upload callback to abort an upload.""" + + def __init__(self, auth_token: str, project_root: pathlib.Path, + shaman_endpoint: str, checkout_id: str) -> None: + from . import client + super().__init__() + self.client = client.ShamanClient(auth_token, shaman_endpoint) + self.project_root = project_root + self.checkout_id = checkout_id + self.log = logging.getLogger(__name__) + + self._file_info = {} # type: typing.Dict[str, FileInfo] + + # When the Shaman creates a checkout, it'll return the location of that + # checkout. This can then be combined with the project-relative path + # of the to-be-rendered blend file (e.g. the one 'bat pack' was pointed + # at). + self._checkout_location = '' + + self.uploaded_files = 0 + self.uploaded_bytes = 0 + + # noinspection PyBroadException + def run(self) -> None: + try: + self.uploaded_files = 0 + self.uploaded_bytes = 0 + + # Construct the Shaman Checkout Definition file. + # This blocks until we know the entire list of files to transfer. + definition_file, allowed_relpaths, delete_when_done = self._create_checkout_definition() + if not definition_file: + # An error has already been logged. + return + + self.log.info('Created checkout definition file of %d KiB', + len(definition_file) // 1024) + self.log.info('Feeding %d files to the Shaman', len(self._file_info)) + if self.log.isEnabledFor(logging.INFO): + for path in self._file_info: + self.log.info(' - %s', path) + + # Try to upload all the files. + failed_paths = set() # type: typing.Set[str] + max_tries = 50 + for try_index in range(max_tries): + # Send the file to the Shaman and see what we still need to send there. + to_upload = self._send_checkout_def_to_shaman(definition_file, allowed_relpaths) + if to_upload is None: + # An error has already been logged. + return + + if not to_upload: + break + + # Send the files that still need to be sent. + self.log.info('Upload attempt %d', try_index + 1) + failed_paths = self._upload_files(to_upload) + if not failed_paths: + break + + # Having failed paths at this point is expected when multiple + # clients are sending the same files. Instead of retrying on a + # file-by-file basis, we just re-send the checkout definition + # file to the Shaman and obtain a new list of files to upload. + + if failed_paths: + self.log.error('Aborting upload due to too many failures') + self.error_set('Giving up after %d attempts to upload the files' % max_tries) + return + + self.log.info('All files uploaded succesfully') + self._request_checkout(definition_file) + + # Delete the files that were supposed to be moved. + for src in delete_when_done: + self.delete_file(src) + + except Exception as ex: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + self.log.exception('Error transferring files to Shaman') + self.error_set('Unexpected exception transferring files to Shaman: %s' % ex) + + # noinspection PyBroadException + def _create_checkout_definition(self) \ + -> typing.Tuple[bytes, typing.Set[str], typing.List[pathlib.Path]]: + """Create the checkout definition file for this BAT pack. + + :returns: the checkout definition (as bytes), a set of paths in that file, + and list of paths to delete. + + If there was an error and file transfer was aborted, the checkout + definition file will be empty. + """ + from . import cache + + definition_lines = [] # type: typing.List[bytes] + delete_when_done = [] # type: typing.List[pathlib.Path] + + # We keep track of the relative paths we want to send to the Shaman, + # so that the Shaman cannot ask us to upload files we didn't want to. + relpaths = set() # type: typing.Set[str] + + for src, dst, act in self.iter_queue(): + try: + checksum = cache.compute_cached_checksum(src) + filesize = src.stat().st_size + # relpath = dst.relative_to(self.project_root) + relpath = bpathlib.strip_root(dst).as_posix() + + self._file_info[relpath] = FileInfo( + checksum=checksum, + filesize=filesize, + abspath=src, + ) + line = '%s %s %s' % (checksum, filesize, relpath) + definition_lines.append(line.encode('utf8')) + relpaths.add(relpath) + + if act == bat_transfer.Action.MOVE: + delete_when_done.append(src) + except Exception: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + msg = 'Error transferring %s to %s' % (src, dst) + self.log.exception(msg) + # Put the files to copy back into the queue, and abort. This allows + # the main thread to inspect the queue and see which files were not + # copied. The one we just failed (due to this exception) should also + # be reported there. + self.queue.put((src, dst, act)) + self.error_set(msg) + return b'', set(), delete_when_done + + cache.cleanup_cache() + return b'\n'.join(definition_lines), relpaths, delete_when_done + + def _send_checkout_def_to_shaman(self, definition_file: bytes, + allowed_relpaths: typing.Set[str]) \ + -> typing.Optional[collections.deque]: + """Send the checkout definition file to the Shaman. + + :return: An iterable of paths (relative to the project root) that still + need to be uploaded, or None if there was an error. + """ + resp = self.client.post('checkout/requirements', data=definition_file, stream=True, + headers={'Content-Type': 'text/plain'}, + timeout=15) + if resp.status_code >= 300: + msg = 'Error from Shaman, code %d: %s' % (resp.status_code, resp.text) + self.log.error(msg) + self.error_set(msg) + return None + + to_upload = collections.deque() # type: collections.deque + for line in resp.iter_lines(): + response, path = line.decode().split(' ', 1) + self.log.debug(' %s: %s', response, path) + + if path not in allowed_relpaths: + msg = 'Shaman requested path we did not intend to upload: %r' % path + self.log.error(msg) + self.error_set(msg) + return None + + if response == response_file_unknown: + to_upload.appendleft(path) + elif response == response_already_uploading: + to_upload.append(path) + elif response == 'ERROR': + msg = 'Error from Shaman: %s' % path + self.log.error(msg) + self.error_set(msg) + return None + else: + msg = 'Unknown response from Shaman for path %r: %r' % (path, response) + self.log.error(msg) + self.error_set(msg) + return None + + return to_upload + + def _upload_files(self, to_upload: collections.deque) -> typing.Set[str]: + """Actually upload the files to Shaman. + + Returns the set of files that we did not upload. + """ + failed_paths = set() # type: typing.Set[str] + deferred_paths = set() + + def defer(some_path: str): + nonlocal to_upload + + self.log.info(' %s deferred (already being uploaded by someone else)', some_path) + deferred_paths.add(some_path) + + # Instead of deferring this one file, randomize the files to upload. + # This prevents multiple deferrals when someone else is uploading + # files from the same project (because it probably happens alphabetically). + all_files = list(to_upload) + random.shuffle(all_files) + to_upload = collections.deque(all_files) + + if not to_upload: + self.log.info('All %d files are at the Shaman already', len(self._file_info)) + self.report_transferred(0) + return failed_paths + + self.log.info('Going to upload %d of %d files', len(to_upload), len(self._file_info)) + while to_upload: + # After too many failures, just retry to get a fresh set of files to upload. + if len(failed_paths) > MAX_FAILED_PATHS: + self.log.info('Too many failures, going to abort this iteration') + failed_paths.update(to_upload) + return failed_paths + + path = to_upload.popleft() + fileinfo = self._file_info[path] + self.log.info(' %s', path) + + headers = { + 'X-Shaman-Original-Filename': path, + } + # Let the Shaman know whether we can defer uploading this file or not. + can_defer = (len(deferred_paths) < MAX_DEFERRED_PATHS + and path not in deferred_paths + and len(to_upload)) + if can_defer: + headers['X-Shaman-Can-Defer-Upload'] = 'true' + + url = 'files/%s/%d' % (fileinfo.checksum, fileinfo.filesize) + try: + with fileinfo.abspath.open('rb') as infile: + resp = self.client.post(url, data=infile, headers=headers) + + except requests.ConnectionError as ex: + if can_defer: + # Closing the connection with an 'X-Shaman-Can-Defer-Upload: true' header + # indicates that we should defer the upload. Requests doesn't give us the + # reply, even though it was written by the Shaman before it closed the + # connection. + defer(path) + else: + self.log.info(' %s could not be uploaded, might retry later: %s', path, ex) + failed_paths.add(path) + continue + + if resp.status_code == 208: + # For small files we get the 208 response, because the server closes the + # connection after we sent the entire request. For bigger files the server + # responds sooner, and Requests gives us the above ConnectionError. + if can_defer: + defer(path) + else: + self.log.info(' %s skipped (already existed on the server)', path) + continue + + if resp.status_code >= 300: + msg = 'Error from Shaman uploading %s, code %d: %s' % ( + fileinfo.abspath, resp.status_code, resp.text) + self.log.error(msg) + self.error_set(msg) + return failed_paths + + failed_paths.discard(path) + self.uploaded_files += 1 + file_size = fileinfo.abspath.stat().st_size + self.uploaded_bytes += file_size + self.report_transferred(file_size) + + if not failed_paths: + self.log.info('Done uploading %d bytes in %d files', + self.uploaded_bytes, self.uploaded_files) + else: + self.log.info('Uploaded %d bytes in %d files so far', + self.uploaded_bytes, self.uploaded_files) + + return failed_paths + + def report_transferred(self, bytes_transferred: int): + if self._abort.is_set(): + self.log.warning('Interrupting ongoing upload') + raise self.AbortUpload('interrupting ongoing upload') + super().report_transferred(bytes_transferred) + + def _request_checkout(self, definition_file: bytes): + """Ask the Shaman to create a checkout of this BAT pack.""" + + if not self.checkout_id: + self.log.warning('NOT requesting checkout at Shaman') + return + + self.log.info('Requesting checkout at Shaman for checkout_id=%r', self.checkout_id) + resp = self.client.post('checkout/create/%s' % self.checkout_id, data=definition_file, + headers={'Content-Type': 'text/plain'}) + if resp.status_code >= 300: + msg = 'Error from Shaman, code %d: %s' % (resp.status_code, resp.text) + self.log.error(msg) + self.error_set(msg) + return + + self._checkout_location = resp.text.strip() + self.log.info('Response from Shaman, code %d: %s', resp.status_code, resp.text) + + @property + def checkout_location(self) -> str: + """Returns the checkout location, or '' if no checkout was made.""" + if not self._checkout_location: + return '' + return self._checkout_location diff --git a/pack/transfer.py b/pack/transfer.py new file mode 100644 index 0000000..ad26846 --- /dev/null +++ b/pack/transfer.py @@ -0,0 +1,221 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import abc +import enum +import logging +import pathlib +import queue +import threading +import time +import typing + +from . import progress + +log = logging.getLogger(__name__) + + +class FileTransferError(IOError): + """Raised when one or more files could not be transferred.""" + + def __init__(self, message, files_remaining: typing.List[pathlib.Path]) -> None: + super().__init__(message) + self.files_remaining = files_remaining + + +class Action(enum.Enum): + COPY = 1 + MOVE = 2 + + +QueueItem = typing.Tuple[pathlib.Path, pathlib.PurePath, Action] + + +class FileTransferer(threading.Thread, metaclass=abc.ABCMeta): + """Abstract superclass for file transfer classes. + + Implement a run() function in a subclass that performs the actual file + transfer. + """ + + def __init__(self) -> None: + super().__init__() + self.log = log.getChild('FileTransferer') + + # For copying in a different process. By using a priority queue the files + # are automatically sorted alphabetically, which means we go through all files + # in a single directory at a time. This should be faster to copy than random + # access. The order isn't guaranteed, though, as we're not waiting around for + # all file paths to be known before copying starts. + + # maxsize=100 is just a guess as to a reasonable upper limit. When this limit + # is reached, the main thread will simply block while waiting for this thread + # to finish copying a file. + self.queue = queue.PriorityQueue(maxsize=100) # type: queue.PriorityQueue[QueueItem] + self.done = threading.Event() + self._abort = threading.Event() # Indicates user-requested abort + + self.__error_mutex = threading.Lock() + self.__error = threading.Event() # Indicates abort due to some error + self.__error_message = '' + + # Instantiate a dummy progress callback so that we can call it + # without checking for None all the time. + self.progress_cb = progress.ThreadSafeCallback(progress.Callback()) + self.total_queued_bytes = 0 + self.total_transferred_bytes = 0 + + @abc.abstractmethod + def run(self): + """Perform actual file transfer in a thread.""" + + def queue_copy(self, src: pathlib.Path, dst: pathlib.PurePath): + """Queue a copy action from 'src' to 'dst'.""" + assert not self.done.is_set(), 'Queueing not allowed after done_and_join() was called' + assert not self._abort.is_set(), 'Queueing not allowed after abort_and_join() was called' + if self.__error.is_set(): + return + self.queue.put((src, dst, Action.COPY)) + self.total_queued_bytes += src.stat().st_size + + def queue_move(self, src: pathlib.Path, dst: pathlib.PurePath): + """Queue a move action from 'src' to 'dst'.""" + assert not self.done.is_set(), 'Queueing not allowed after done_and_join() was called' + assert not self._abort.is_set(), 'Queueing not allowed after abort_and_join() was called' + if self.__error.is_set(): + return + self.queue.put((src, dst, Action.MOVE)) + self.total_queued_bytes += src.stat().st_size + + def report_transferred(self, bytes_transferred: int): + """Report transfer of `block_size` bytes.""" + + self.total_transferred_bytes += bytes_transferred + self.progress_cb.transfer_progress(self.total_queued_bytes, self.total_transferred_bytes) + + def done_and_join(self) -> None: + """Indicate all files have been queued, and wait until done. + + After this function has been called, the queue_xxx() methods should not + be called any more. + + :raises FileTransferError: if there was an error transferring one or + more files. + """ + + self.done.set() + self.join() + + if not self.queue.empty(): + # Flush the queue so that we can report which files weren't copied yet. + files_remaining = self._files_remaining() + assert files_remaining + raise FileTransferError( + "%d files couldn't be transferred" % len(files_remaining), + files_remaining) + + def _files_remaining(self) -> typing.List[pathlib.Path]: + """Source files that were queued but not transferred.""" + files_remaining = [] + while not self.queue.empty(): + src, dst, act = self.queue.get_nowait() + files_remaining.append(src) + return files_remaining + + def abort(self) -> None: + """Abort the file transfer, immediately returns.""" + log.info('Aborting') + self._abort.set() + + def abort_and_join(self) -> None: + """Abort the file transfer, and wait until done.""" + + self.abort() + self.join() + + files_remaining = self._files_remaining() + if not files_remaining: + return + log.warning("%d files couldn't be transferred, starting with %s", + len(files_remaining), files_remaining[0]) + + def iter_queue(self) -> typing.Iterable[QueueItem]: + """Generator, yield queued items until the work is done.""" + + while True: + if self._abort.is_set() or self.__error.is_set(): + return + + try: + src, dst, action = self.queue.get(timeout=0.5) + self.progress_cb.transfer_file(src, dst) + yield src, dst, action + except queue.Empty: + if self.done.is_set(): + return + + def join(self, timeout: float = None) -> None: + """Wait for the transfer to finish/stop.""" + + if timeout: + run_until = time.time() + timeout + else: + run_until = float('inf') + + # We can't simply block the thread, we have to keep watching the + # progress queue. + while self.is_alive(): + if time.time() > run_until: + self.log.warning('Timeout while waiting for transfer to finish') + return + + self.progress_cb.flush(timeout=0.5) + + # Since Thread.join() neither returns anything nor raises any exception + # when timing out, we don't even have to call it any more. + + def delete_file(self, path: pathlib.Path): + """Deletes a file, only logging a warning if deletion fails.""" + log.debug('Deleting %s, file has been transferred', path) + try: + path.unlink() + except IOError as ex: + log.warning('Unable to delete %s: %s', path, ex) + + @property + def has_error(self) -> bool: + return self.__error.is_set() + + def error_set(self, message: str): + """Indicate an error occurred, and provide a message.""" + + with self.__error_mutex: + # Avoid overwriting previous error messages. + if self.__error.is_set(): + return + + self.__error.set() + self.__error_message = message + + def error_message(self) -> str: + """Retrieve the error messsage, or an empty string if no error occurred.""" + with self.__error_mutex: + if not self.__error.is_set(): + return '' + return self.__error_message diff --git a/pack/zipped.py b/pack/zipped.py new file mode 100644 index 0000000..3042891 --- /dev/null +++ b/pack/zipped.py @@ -0,0 +1,89 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""ZIP file packer. + +Note: There is no official file name encoding for ZIP files. Expect trouble +when you want to use the ZIP cross-platform and you have non-ASCII names. +""" +import logging +import pathlib + +from . import Packer, transfer + +log = logging.getLogger(__name__) + +# Suffixes to store uncompressed in the zip. +STORE_ONLY = {'.jpg', '.jpeg', '.exr'} + + +class ZipPacker(Packer): + """Creates a zipped BAT Pack instead of a directory.""" + + def _create_file_transferer(self) -> transfer.FileTransferer: + target_path = pathlib.Path(self._target_path) + return ZipTransferrer(target_path.absolute()) + + +class ZipTransferrer(transfer.FileTransferer): + """Creates a ZIP file instead of writing to a directory. + + Note: There is no official file name encoding for ZIP files. If you have + unicode file names, they will be encoded as UTF-8. WinZip interprets all + file names as encoded in CP437, also known as DOS Latin. + """ + + def __init__(self, zippath: pathlib.Path) -> None: + super().__init__() + self.zippath = zippath + + def run(self) -> None: + import zipfile + + zippath = self.zippath.absolute() + + with zipfile.ZipFile(str(zippath), 'w') as outzip: + for src, dst, act in self.iter_queue(): + assert src.is_absolute(), 'expecting only absolute paths, not %r' % src + + dst = pathlib.Path(dst).absolute() + try: + relpath = dst.relative_to(zippath) + + # Don't bother trying to compress already-compressed files. + if src.suffix.lower() in STORE_ONLY: + compression = zipfile.ZIP_STORED + log.debug('ZIP %s -> %s (uncompressed)', src, relpath) + else: + compression = zipfile.ZIP_DEFLATED + log.debug('ZIP %s -> %s', src, relpath) + outzip.write(str(src), arcname=str(relpath), compress_type=compression) + + if act == transfer.Action.MOVE: + self.delete_file(src) + except Exception: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + log.exception('Error transferring %s to %s', src, dst) + # Put the files to copy back into the queue, and abort. This allows + # the main thread to inspect the queue and see which files were not + # copied. The one we just failed (due to this exception) should also + # be reported there. + self.queue.put((src, dst, act)) + return diff --git a/trace/__init__.py b/trace/__init__.py new file mode 100644 index 0000000..c574f09 --- /dev/null +++ b/trace/__init__.py @@ -0,0 +1,80 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import logging +import pathlib +import typing + +from blender_asset_tracer import blendfile +from . import result, blocks2assets, file2blocks, progress + +log = logging.getLogger(__name__) + +codes_to_skip = { + # These blocks never have external assets: + b'ID', b'WM', b'SN', + + # These blocks are skipped for now, until we have proof they point to + # assets otherwise missed: + b'GR', b'WO', b'BR', b'LS', +} + + +def deps(bfilepath: pathlib.Path, progress_cb: typing.Optional[progress.Callback] = None) \ + -> typing.Iterator[result.BlockUsage]: + """Open the blend file and report its dependencies. + + :param bfilepath: File to open. + :param progress_cb: Progress callback object. + """ + + log.info('opening: %s', bfilepath) + bfile = blendfile.open_cached(bfilepath) + + bi = file2blocks.BlockIterator() + if progress_cb: + bi.progress_cb = progress_cb + + # Remember which block usages we've reported already, without keeping the + # blocks themselves in memory. + seen_hashes = set() # type: typing.Set[int] + + for block in asset_holding_blocks(bi.iter_blocks(bfile)): + for block_usage in blocks2assets.iter_assets(block): + usage_hash = hash(block_usage) + if usage_hash in seen_hashes: + continue + seen_hashes.add(usage_hash) + yield block_usage + + +def asset_holding_blocks(blocks: typing.Iterable[blendfile.BlendFileBlock]) \ + -> typing.Iterator[blendfile.BlendFileBlock]: + """Generator, yield data blocks that could reference external assets.""" + for block in blocks: + assert isinstance(block, blendfile.BlendFileBlock) + code = block.code + + # The longer codes are either arbitrary data or data blocks that + # don't refer to external assets. The former data blocks will be + # visited when we hit the two-letter datablocks that use them. + if len(code) > 2 or code in codes_to_skip: + continue + + yield block diff --git a/trace/blocks2assets.py b/trace/blocks2assets.py new file mode 100644 index 0000000..b58a8c8 --- /dev/null +++ b/trace/blocks2assets.py @@ -0,0 +1,210 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Block walkers. + +From a Blend file data block, iter_assts() yields all the referred-to assets. +""" + +import functools +import logging +import typing + +from blender_asset_tracer import blendfile, bpathlib, cdefs +from blender_asset_tracer.blendfile import iterators +from . import result, modifier_walkers + +log = logging.getLogger(__name__) + +_warned_about_types = set() # type: typing.Set[bytes] +_funcs_for_code = {} # type: typing.Dict[bytes, typing.Callable] + + +def iter_assets(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Generator, yield the assets used by this data block.""" + assert block.code != b'DATA' + + try: + block_reader = _funcs_for_code[block.code] + except KeyError: + if block.code not in _warned_about_types: + log.debug('No reader implemented for block type %r', block.code.decode()) + _warned_about_types.add(block.code) + return + + log.debug('Tracing block %r', block) + yield from block_reader(block) + + +def dna_code(block_code: str): + """Decorator, marks decorated func as handler for that DNA code.""" + + assert isinstance(block_code, str) + + def decorator(wrapped): + _funcs_for_code[block_code.encode()] = wrapped + return wrapped + + return decorator + + +def skip_packed(wrapped): + """Decorator, skip blocks where 'packedfile' is set to true.""" + + @functools.wraps(wrapped) + def wrapper(block: blendfile.BlendFileBlock, *args, **kwargs): + if block.get(b'packedfile', default=False): + log.debug('Datablock %r is packed; skipping', block.id_name) + return + + yield from wrapped(block, *args, **kwargs) + + return wrapper + + +@dna_code('CF') +def cache_file(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Cache file data blocks.""" + path, field = block.get(b'filepath', return_field=True) + yield result.BlockUsage(block, path, path_full_field=field) + + +@dna_code('IM') +@skip_packed +def image(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Image data blocks.""" + # old files miss this + image_source = block.get(b'source', default=cdefs.IMA_SRC_FILE) + #print('------image_source: ', image_source) + #if image_source not in {cdefs.IMA_SRC_FILE, cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_MOVIE}: + # return + if image_source not in {cdefs.IMA_SRC_FILE, cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_MOVIE, cdefs.IMA_SRC_TILED}: + return + pathname, field = block.get(b'name', return_field=True) + #is_sequence = image_source == cdefs.IMA_SRC_SEQUENCE + + if image_source in {cdefs.IMA_SRC_SEQUENCE, cdefs.IMA_SRC_TILED}: + is_sequence = True + else: + is_sequence = False + + #print('is_sequence: ', is_sequence) + yield result.BlockUsage(block, pathname, is_sequence, path_full_field=field) + + +@dna_code('LI') +def library(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Library data blocks.""" + path, field = block.get(b'name', return_field=True) + yield result.BlockUsage(block, path, path_full_field=field) + + # The 'filepath' also points to the blend file. However, this is set to the + # absolute path of the file by Blender (see BKE_library_filepath_set). This + # is thus not a property we have to report or rewrite. + + +@dna_code('ME') +def mesh(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Mesh data blocks.""" + block_external = block.get_pointer((b'ldata', b'external'), None) + if block_external is None: + block_external = block.get_pointer((b'fdata', b'external'), None) + if block_external is None: + return + + path, field = block_external.get(b'filename', return_field=True) + yield result.BlockUsage(block, path, path_full_field=field) + + +@dna_code('MC') +def movie_clip(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """MovieClip data blocks.""" + path, field = block.get(b'name', return_field=True) + # TODO: The assumption that this is not a sequence may not be true for all modifiers. + yield result.BlockUsage(block, path, is_sequence=False, path_full_field=field) + + +@dna_code('OB') +def object_block(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Object data blocks.""" + ctx = modifier_walkers.ModifierContext(owner=block) + + # 'ob->modifiers[...].filepath' + for mod_idx, block_mod in enumerate(iterators.modifiers(block)): + block_name = b'%s.modifiers[%d]' % (block.id_name, mod_idx) + mod_type = block_mod[b'modifier', b'type'] + log.debug('Tracing modifier %s, type=%d', block_name.decode(), mod_type) + + try: + mod_handler = modifier_walkers.modifier_handlers[mod_type] + except KeyError: + continue + yield from mod_handler(ctx, block_mod, block_name) + + +@dna_code('SC') +def scene(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Scene data blocks.""" + # Sequence editor is the only interesting bit. + block_ed = block.get_pointer(b'ed') + if block_ed is None: + return + + single_asset_types = {cdefs.SEQ_TYPE_MOVIE, cdefs.SEQ_TYPE_SOUND_RAM, cdefs.SEQ_TYPE_SOUND_HD} + asset_types = single_asset_types.union({cdefs.SEQ_TYPE_IMAGE}) + + for seq, seq_type in iterators.sequencer_strips(block_ed): + if seq_type not in asset_types: + continue + + seq_strip = seq.get_pointer(b'strip') + if seq_strip is None: + continue + seq_stripdata = seq_strip.get_pointer(b'stripdata') + if seq_stripdata is None: + continue + + dirname, dn_field = seq_strip.get(b'dir', return_field=True) + basename, bn_field = seq_stripdata.get(b'name', return_field=True) + asset_path = bpathlib.BlendPath(dirname) / basename + + is_sequence = seq_type not in single_asset_types + yield result.BlockUsage(seq_strip, asset_path, + is_sequence=is_sequence, + path_dir_field=dn_field, + path_base_field=bn_field) + + +@dna_code('SO') +@skip_packed +def sound(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Sound data blocks.""" + path, field = block.get(b'name', return_field=True) + yield result.BlockUsage(block, path, path_full_field=field) + + +@dna_code('VF') +@skip_packed +def vector_font(block: blendfile.BlendFileBlock) -> typing.Iterator[result.BlockUsage]: + """Vector Font data blocks.""" + path, field = block.get(b'name', return_field=True) + if path == b'': # builtin font + return + yield result.BlockUsage(block, path, path_full_field=field) diff --git a/trace/expanders.py b/trace/expanders.py new file mode 100644 index 0000000..1f66387 --- /dev/null +++ b/trace/expanders.py @@ -0,0 +1,294 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2014, Blender Foundation - Campbell Barton +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Low-level functions called by file2block. + +Those can expand data blocks and yield their dependencies (e.g. other data +blocks necessary to render/display/work with the given data block). +""" +import logging +import typing + +from blender_asset_tracer import blendfile, cdefs +from blender_asset_tracer.blendfile import iterators + +# Don't warn about these types at all. +_warned_about_types = {b'LI', b'DATA'} +_funcs_for_code = {} # type: typing.Dict[bytes, typing.Callable] +log = logging.getLogger(__name__) + + +def expand_block(block: blendfile.BlendFileBlock) -> typing.Iterator[blendfile.BlendFileBlock]: + """Generator, yield the data blocks used by this data block.""" + + try: + expander = _funcs_for_code[block.code] + except KeyError: + if block.code not in _warned_about_types: + log.debug('No expander implemented for block type %r', block.code.decode()) + _warned_about_types.add(block.code) + return + + log.debug('Expanding block %r', block) + # Filter out falsy blocks, i.e. None values. + # Allowing expanders to yield None makes them more consise. + yield from filter(None, expander(block)) + + +def dna_code(block_code: str): + """Decorator, marks decorated func as expander for that DNA code.""" + + assert isinstance(block_code, str) + + def decorator(wrapped): + _funcs_for_code[block_code.encode()] = wrapped + return wrapped + + return decorator + + +def _expand_generic_material(block: blendfile.BlendFileBlock): + array_len = block.get(b'totcol') + yield from block.iter_array_of_pointers(b'mat', array_len) + + +def _expand_generic_mtex(block: blendfile.BlendFileBlock): + if not block.dna_type.has_field(b'mtex'): + # mtex was removed in Blender 2.8 + return + + for mtex in block.iter_fixed_array_of_pointers(b'mtex'): + yield mtex.get_pointer(b'tex') + yield mtex.get_pointer(b'object') + + +def _expand_generic_nodetree(block: blendfile.BlendFileBlock): + assert block.dna_type.dna_type_id == b'bNodeTree' + + nodes = block.get_pointer((b'nodes', b'first')) + for node in iterators.listbase(nodes): + if node[b'type'] == cdefs.CMP_NODE_R_LAYERS: + continue + yield node + + # The 'id' property points to whatever is used by the node + # (like the image in an image texture node). + yield node.get_pointer(b'id') + + +def _expand_generic_nodetree_id(block: blendfile.BlendFileBlock): + block_ntree = block.get_pointer(b'nodetree', None) + if block_ntree is not None: + yield from _expand_generic_nodetree(block_ntree) + + +def _expand_generic_animdata(block: blendfile.BlendFileBlock): + block_adt = block.get_pointer(b'adt') + if block_adt: + yield block_adt.get_pointer(b'action') + # TODO, NLA + + +@dna_code('AR') +def _expand_armature(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + + +@dna_code('CU') +def _expand_curve(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_material(block) + + for fieldname in (b'vfont', b'vfontb', b'vfonti', b'vfontbi', + b'bevobj', b'taperobj', b'textoncurve'): + yield block.get_pointer(fieldname) + + +@dna_code('GR') +def _expand_group(block: blendfile.BlendFileBlock): + log.debug('Collection/group Block: %s (name=%s)', block, block.id_name) + + objects = block.get_pointer((b'gobject', b'first')) + for item in iterators.listbase(objects): + yield item.get_pointer(b'ob') + + # Recurse through child collections. + try: + children = block.get_pointer((b'children', b'first')) + except KeyError: + # 'children' was introduced in Blender 2.8 collections + pass + else: + for child in iterators.listbase(children): + subcoll = child.get_pointer(b'collection') + if subcoll is None: + continue + + if subcoll.dna_type_id == b'ID': + # This issue happened while recursing a linked-in 'Hidden' + # collection in the Chimes set of the Spring project. Such + # collections named 'Hidden' were apparently created while + # converting files from Blender 2.79 to 2.80. This error + # isn't reproducible with just Blender 2.80. + yield subcoll + continue + + log.debug('recursing into child collection %s (name=%r, type=%r)', + subcoll, subcoll.id_name, subcoll.dna_type_name) + yield from _expand_group(subcoll) + + +@dna_code('LA') +def _expand_lamp(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_nodetree_id(block) + yield from _expand_generic_mtex(block) + + +@dna_code('MA') +def _expand_material(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_nodetree_id(block) + yield from _expand_generic_mtex(block) + + try: + yield block.get_pointer(b'group') + except KeyError: + # Groups were removed from Blender 2.8 + pass + + +@dna_code('MB') +def _expand_metaball(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_material(block) + + +@dna_code('ME') +def _expand_mesh(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_material(block) + yield block.get_pointer(b'texcomesh') + # TODO, TexFace? - it will be slow, we could simply ignore :S + + +@dna_code('NT') +def _expand_node_tree(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_nodetree(block) + + +@dna_code('OB') +def _expand_object(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_material(block) + + yield block.get_pointer(b'data') + + if block[b'transflag'] & cdefs.OB_DUPLIGROUP: + yield block.get_pointer(b'dup_group') + + yield block.get_pointer(b'proxy') + yield block.get_pointer(b'proxy_group') + + # 'ob->pose->chanbase[...].custom' + block_pose = block.get_pointer(b'pose') + if block_pose: + assert block_pose.dna_type.dna_type_id == b'bPose' + # sdna_index_bPoseChannel = block_pose.file.sdna_index_from_id[b'bPoseChannel'] + channels = block_pose.get_pointer((b'chanbase', b'first')) + for pose_chan in iterators.listbase(channels): + yield pose_chan.get_pointer(b'custom') + + # Expand the objects 'ParticleSettings' via 'ob->particlesystem[...].part' + # sdna_index_ParticleSystem = block.file.sdna_index_from_id.get(b'ParticleSystem') + # if sdna_index_ParticleSystem is not None: + psystems = block.get_pointer((b'particlesystem', b'first')) + for psystem in iterators.listbase(psystems): + yield psystem.get_pointer(b'part') + + # Modifiers can also refer to other datablocks, which should also get expanded. + for block_mod in iterators.modifiers(block): + mod_type = block_mod[b'modifier', b'type'] + # Currently only node groups are supported. If the support should expand + # to more types, something more intelligent than this should be made. + if mod_type == cdefs.eModifierType_Nodes: + yield block_mod.get_pointer(b'node_group') + + +@dna_code('PA') +def _expand_particle_settings(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_mtex(block) + + block_ren_as = block[b'ren_as'] + if block_ren_as == cdefs.PART_DRAW_GR: + yield block.get_pointer(b'dup_group') + elif block_ren_as == cdefs.PART_DRAW_OB: + yield block.get_pointer(b'dup_ob') + + +@dna_code('SC') +def _expand_scene(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_nodetree_id(block) + yield block.get_pointer(b'camera') + yield block.get_pointer(b'world') + yield block.get_pointer(b'set', default=None) + yield block.get_pointer(b'clip', default=None) + + # sdna_index_Base = block.file.sdna_index_from_id[b'Base'] + # for item in bf_utils.iter_ListBase(block.get_pointer((b'base', b'first'))): + # yield item.get_pointer(b'object', sdna_index_refine=sdna_index_Base) + bases = block.get_pointer((b'base', b'first')) + for base in iterators.listbase(bases): + yield base.get_pointer(b'object') + + # Sequence Editor + block_ed = block.get_pointer(b'ed') + if not block_ed: + return + + strip_type_to_field = { + cdefs.SEQ_TYPE_SCENE: b'scene', + cdefs.SEQ_TYPE_MOVIECLIP: b'clip', + cdefs.SEQ_TYPE_MASK: b'mask', + cdefs.SEQ_TYPE_SOUND_RAM: b'sound', + } + for strip, strip_type in iterators.sequencer_strips(block_ed): + try: + field_name = strip_type_to_field[strip_type] + except KeyError: + continue + yield strip.get_pointer(field_name) + + +@dna_code('TE') +def _expand_texture(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_nodetree_id(block) + yield block.get_pointer(b'ima') + + +@dna_code('WO') +def _expand_world(block: blendfile.BlendFileBlock): + yield from _expand_generic_animdata(block) + yield from _expand_generic_nodetree_id(block) + yield from _expand_generic_mtex(block) diff --git a/trace/file2blocks.py b/trace/file2blocks.py new file mode 100644 index 0000000..944edfa --- /dev/null +++ b/trace/file2blocks.py @@ -0,0 +1,176 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Expand data blocks. + +The expansion process follows pointers and library links to construct the full +set of actually-used data blocks. This set consists of all data blocks in the +initial blend file, and all *actually linked-to* data blocks in linked +blend files. +""" +import collections +import logging +import pathlib +import queue +import typing + +from blender_asset_tracer import blendfile, bpathlib +from . import expanders, progress + +_funcs_for_code = {} # type: typing.Dict[bytes, typing.Callable] +log = logging.getLogger(__name__) + + +# noinspection PyProtectedMember +class BlockQueue(queue.PriorityQueue): + """PriorityQueue that sorts by filepath and file offset""" + + def _put(self, item: blendfile.BlendFileBlock): + super()._put((item.bfile.filepath, item.file_offset, item)) + + def _get(self) -> blendfile.BlendFileBlock: + _, _, item = super()._get() + return item + + +class BlockIterator: + """Expand blocks with dependencies from other libraries. + + This class exists so that we have some context for the recursive expansion + without having to pass those variables to each recursive call. + """ + + def __init__(self) -> None: + # Set of (blend file Path, block address) of already-reported blocks. + self.blocks_yielded = set() # type: typing.Set[typing.Tuple[pathlib.Path, int]] + + # Queue of blocks to visit + self.to_visit = BlockQueue() + + self.progress_cb = progress.Callback() + + def iter_blocks(self, + bfile: blendfile.BlendFile, + limit_to: typing.Set[blendfile.BlendFileBlock] = set(), + ) -> typing.Iterator[blendfile.BlendFileBlock]: + """Expand blocks with dependencies from other libraries.""" + + self.progress_cb.trace_blendfile(bfile.filepath) + log.info('inspecting: %s', bfile.filepath) + if limit_to: + self._queue_named_blocks(bfile, limit_to) + else: + self._queue_all_blocks(bfile) + + blocks_per_lib = yield from self._visit_blocks(bfile, limit_to) + yield from self._visit_linked_blocks(blocks_per_lib) + + def _visit_blocks(self, bfile, limit_to): + bpath = bpathlib.make_absolute(bfile.filepath) + root_dir = bpathlib.BlendPath(bpath.parent) + + # Mapping from library path to data blocks to expand. + blocks_per_lib = collections.defaultdict(set) + + while not self.to_visit.empty(): + block = self.to_visit.get() + assert isinstance(block, blendfile.BlendFileBlock) + if (bpath, block.addr_old) in self.blocks_yielded: + continue + + if block.code == b'ID': + # ID blocks represent linked-in assets. Those are the ones that + # should be loaded from their own blend file and "expanded" to + # the entire set of data blocks required to render them. We + # defer the handling of those so that we can work with one + # blend file at a time. + lib = block.get_pointer(b'lib') + lib_bpath = bpathlib.BlendPath(lib[b'name']).absolute(root_dir) + blocks_per_lib[lib_bpath].add(block) + + # The library block itself should also be reported, because it + # represents a blend file that is a dependency as well. + self.to_visit.put(lib) + continue + + if limit_to: + # We're limiting the blocks, so we have to expand them to make + # sure we don't miss anything. Otherwise we're yielding the + # entire file anyway, and no expansion is necessary. + self._queue_dependencies(block) + self.blocks_yielded.add((bpath, block.addr_old)) + yield block + + return blocks_per_lib + + def _visit_linked_blocks(self, blocks_per_lib): + # We've gone through all the blocks in this file, now open the libraries + # and iterate over the blocks referred there. + for lib_bpath, idblocks in blocks_per_lib.items(): + lib_path = bpathlib.make_absolute(lib_bpath.to_path()) + + #assert lib_path.exists() + if not lib_path.exists(): + log.warning('Library %s does not exist', lib_path) + continue + + log.debug('Expanding %d blocks in %s', len(idblocks), lib_path) + libfile = blendfile.open_cached(lib_path) + yield from self.iter_blocks(libfile, idblocks) + + def _queue_all_blocks(self, bfile: blendfile.BlendFile): + log.debug('Queueing all blocks from file %s', bfile.filepath) + for block in bfile.blocks: + # Don't bother visiting DATA blocks, as we won't know what + # to do with them anyway. + if block.code == b'DATA': + continue + self.to_visit.put(block) + + def _queue_named_blocks(self, + bfile: blendfile.BlendFile, + limit_to: typing.Set[blendfile.BlendFileBlock]): + """Queue only the blocks referred to in limit_to. + + :param bfile: + :param limit_to: set of ID blocks that name the blocks to queue. + The queued blocks are loaded from the actual blend file, and + selected by name. + """ + + for to_find in limit_to: + assert to_find.code == b'ID' + name_to_find = to_find[b'name'] + code = name_to_find[:2] + log.debug('Finding block %r with code %r', name_to_find, code) + same_code = bfile.find_blocks_from_code(code) + for block in same_code: + if block.id_name == name_to_find: + log.debug('Queueing %r from file %s', block, bfile.filepath) + self.to_visit.put(block) + + def _queue_dependencies(self, block: blendfile.BlendFileBlock): + for block in expanders.expand_block(block): + self.to_visit.put(block) + + +def iter_blocks(bfile: blendfile.BlendFile) -> typing.Iterator[blendfile.BlendFileBlock]: + """Generator, yield all blocks in this file + required blocks in libs.""" + bi = BlockIterator() + yield from bi.iter_blocks(bfile) diff --git a/trace/file_sequence.py b/trace/file_sequence.py new file mode 100644 index 0000000..2c61ba3 --- /dev/null +++ b/trace/file_sequence.py @@ -0,0 +1,69 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import logging +import pathlib +import typing + +log = logging.getLogger(__name__) + + +class DoesNotExist(OSError): + """Indicates a path does not exist on the filesystem.""" + + def __init__(self, path: pathlib.Path) -> None: + super().__init__(path) + self.path = path + + +def expand_sequence(path: pathlib.Path) -> typing.Iterator[pathlib.Path]: + """Expand a file sequence path into the actual file paths. + + :param path: can be either a glob pattern (must contain a * character) + or the path of the first file in the sequence. + """ + + if '*' in str(path): # assume it is a glob + import glob + log.debug('expanding glob %s', path) + for fname in sorted(glob.glob(str(path), recursive=True)): + yield pathlib.Path(fname) + return + + if not path.exists(): + raise DoesNotExist(path) + + if path.is_dir(): + yield path + return + + log.debug('expanding file sequence %s', path) + + import string + stem_no_digits = path.stem.rstrip(string.digits) + if stem_no_digits == path.stem: + # Just a single file, no digits here. + yield path + return + + # Return everything start starts with 'stem_no_digits' and ends with the + # same suffix as the first file. This may result in more files than used + # by Blender, but at least it shouldn't miss any. + pattern = '%s*%s' % (stem_no_digits, path.suffix) + yield from sorted(path.parent.glob(pattern)) diff --git a/trace/modifier_walkers.py b/trace/modifier_walkers.py new file mode 100644 index 0000000..0e5f27a --- /dev/null +++ b/trace/modifier_walkers.py @@ -0,0 +1,251 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Modifier handling code used in blocks2assets.py + +The modifier_xxx() functions all yield result.BlockUsage objects for external +files used by the modifiers. +""" +import logging +import typing + +from blender_asset_tracer import blendfile, bpathlib, cdefs +from . import result + +log = logging.getLogger(__name__) +modifier_handlers = {} # type: typing.Dict[int, typing.Callable] + + +class ModifierContext: + """Meta-info for modifier expansion. + + Currently just contains the object on which the modifier is defined. + """ + def __init__(self, owner: blendfile.BlendFileBlock) -> None: + assert owner.dna_type_name == 'Object' + self.owner = owner + + +def mod_handler(dna_num: int): + """Decorator, marks decorated func as handler for that modifier number.""" + + assert isinstance(dna_num, int) + + def decorator(wrapped): + modifier_handlers[dna_num] = wrapped + return wrapped + + return decorator + + +@mod_handler(cdefs.eModifierType_MeshCache) +def modifier_filepath(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + """Just yield the 'filepath' field.""" + path, field = modifier.get(b'filepath', return_field=True) + yield result.BlockUsage(modifier, path, path_full_field=field, block_name=block_name) + + +@mod_handler(cdefs.eModifierType_MeshSequenceCache) +def modifier_mesh_sequence_cache(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, + block_name: bytes) -> typing.Iterator[result.BlockUsage]: + """Yield the Alembic file(s) used by this modifier""" + cache_file = modifier.get_pointer(b'cache_file') + if cache_file is None: + return + + is_sequence = bool(cache_file[b'is_sequence']) + cache_block_name = cache_file.id_name + assert cache_block_name is not None + + path, field = cache_file.get(b'filepath', return_field=True) + yield result.BlockUsage(cache_file, path, path_full_field=field, + is_sequence=is_sequence, + block_name=cache_block_name) + + +@mod_handler(cdefs.eModifierType_Ocean) +def modifier_ocean(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + if not modifier[b'cached']: + return + + path, field = modifier.get(b'cachepath', return_field=True) + # The path indicates the directory containing the cached files. + yield result.BlockUsage(modifier, path, is_sequence=True, path_full_field=field, + block_name=block_name) + + +def _get_texture(prop_name: bytes, dblock: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + """Yield block usages from a texture propery. + + Assumes dblock[prop_name] is a texture data block. + """ + if dblock is None: + return + + tx = dblock.get_pointer(prop_name) + yield from _get_image(b'ima', tx, block_name) + + +def _get_image(prop_name: bytes, + dblock: typing.Optional[blendfile.BlendFileBlock], + block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + """Yield block usages from an image propery. + + Assumes dblock[prop_name] is an image data block. + """ + if not dblock: + return + + try: + ima = dblock.get_pointer(prop_name) + except KeyError as ex: + # No such property, just return. + log.debug('_get_image() called with non-existing property name: %s', ex) + return + + if not ima: + return + + path, field = ima.get(b'name', return_field=True) + yield result.BlockUsage(ima, path, path_full_field=field, block_name=block_name) + + +@mod_handler(cdefs.eModifierType_Displace) +@mod_handler(cdefs.eModifierType_Wave) +def modifier_texture(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + return _get_texture(b'texture', modifier, block_name) + + +@mod_handler(cdefs.eModifierType_WeightVGEdit) +@mod_handler(cdefs.eModifierType_WeightVGMix) +@mod_handler(cdefs.eModifierType_WeightVGProximity) +def modifier_mask_texture(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, + block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + return _get_texture(b'mask_texture', modifier, block_name) + + +@mod_handler(cdefs.eModifierType_UVProject) +def modifier_image(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + yield from _get_image(b'image', modifier, block_name) + + +def _walk_point_cache(ctx: ModifierContext, + block_name: bytes, + bfile: blendfile.BlendFile, + pointcache: blendfile.BlendFileBlock, + extension: bytes): + flag = pointcache[b'flag'] + if flag & cdefs.PTCACHE_EXTERNAL: + path, field = pointcache.get(b'path', return_field=True) + log.info(' external cache at %s', path) + bpath = bpathlib.BlendPath(path) + yield result.BlockUsage(pointcache, bpath, path_full_field=field, + is_sequence=True, block_name=block_name) + elif flag & cdefs.PTCACHE_DISK_CACHE: + # See ptcache_path() in pointcache.c + name, field = pointcache.get(b'name', return_field=True) + if not name: + # See ptcache_filename() in pointcache.c + idname = ctx.owner[b'id', b'name'] + name = idname[2:].hex().upper().encode() + path = b'//%b%b/%b_*%b' % ( + cdefs.PTCACHE_PATH, + bfile.filepath.stem.encode(), + name, + extension) + log.info(' disk cache at %s', path) + bpath = bpathlib.BlendPath(path) + yield result.BlockUsage(pointcache, bpath, path_full_field=field, + is_sequence=True, block_name=block_name) + + +@mod_handler(cdefs.eModifierType_ParticleSystem) +def modifier_particle_system(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, + block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + psys = modifier.get_pointer(b'psys') + if psys is None: + return + + pointcache = psys.get_pointer(b'pointcache') + if pointcache is None: + return + + yield from _walk_point_cache(ctx, block_name, modifier.bfile, pointcache, cdefs.PTCACHE_EXT) + + +@mod_handler(cdefs.eModifierType_Fluidsim) +def modifier_fluid_sim(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + my_log = log.getChild('modifier_fluid_sim') + + fss = modifier.get_pointer(b'fss') + if fss is None: + my_log.debug('Modifier %r (%r) has no fss', + modifier[b'modifier', b'name'], block_name) + return + + path, field = fss.get(b'surfdataPath', return_field=True) + + # This may match more than is used by Blender, but at least it shouldn't + # miss any files. + # The 'fluidsurface' prefix is defined in source/blender/makesdna/DNA_object_fluidsim_types.h + bpath = bpathlib.BlendPath(path) + yield result.BlockUsage(fss, bpath, path_full_field=field, + is_sequence=True, block_name=block_name) + + +@mod_handler(cdefs.eModifierType_Smokesim) +def modifier_smoke_sim(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + my_log = log.getChild('modifier_smoke_sim') + + domain = modifier.get_pointer(b'domain') + if domain is None: + my_log.debug('Modifier %r (%r) has no domain', + modifier[b'modifier', b'name'], block_name) + return + + pointcache = domain.get_pointer(b'point_cache') + if pointcache is None: + return + + format = domain.get(b'cache_file_format') + extensions = { + cdefs.PTCACHE_FILE_PTCACHE: cdefs.PTCACHE_EXT, + cdefs.PTCACHE_FILE_OPENVDB: cdefs.PTCACHE_EXT_VDB + } + yield from _walk_point_cache(ctx, block_name, modifier.bfile, pointcache, extensions[format]) + + +@mod_handler(cdefs.eModifierType_Cloth) +def modifier_cloth(ctx: ModifierContext, modifier: blendfile.BlendFileBlock, block_name: bytes) \ + -> typing.Iterator[result.BlockUsage]: + pointcache = modifier.get_pointer(b'point_cache') + if pointcache is None: + return + + yield from _walk_point_cache(ctx, block_name, modifier.bfile, pointcache, cdefs.PTCACHE_EXT) diff --git a/trace/progress.py b/trace/progress.py new file mode 100644 index 0000000..c6998ef --- /dev/null +++ b/trace/progress.py @@ -0,0 +1,31 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +"""Callback class definition for BAT Tracer progress reporting. + +Mostly used to forward events to pack.progress.Callback. +""" +import pathlib + + +class Callback: + """BAT Tracer progress reporting.""" + + def trace_blendfile(self, filename: pathlib.Path) -> None: + """Called for every blendfile opened when tracing dependencies.""" diff --git a/trace/result.py b/trace/result.py new file mode 100644 index 0000000..a769e8d --- /dev/null +++ b/trace/result.py @@ -0,0 +1,182 @@ +# ***** BEGIN GPL LICENSE BLOCK ***** +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# ***** END GPL LICENCE BLOCK ***** +# +# (c) 2018, Blender Foundation - Sybren A. Stüvel +import functools +import logging +import pathlib +import typing + +from blender_asset_tracer import blendfile, bpathlib +from blender_asset_tracer.blendfile import dna +from . import file_sequence + +log = logging.getLogger(__name__) + + +@functools.total_ordering +class BlockUsage: + """Represents the use of an asset by a data block. + + :ivar block_name: an identifying name for this block. Defaults to the ID + name of the block. + :ivar block: + :ivar asset_path: The path of the asset, if is_sequence=False. Otherwise + it can be either a glob pattern (must contain a * byte) or the path of + the first file in the sequence. + :ivar is_sequence: Indicates whether this file is alone (False), the + first of a sequence (True, and the path points to a file), or a + directory containing a sequence (True, and path points to a directory). + In certain cases such files should be reported once (f.e. when + rewriting the source field to another path), and in other cases the + sequence should be expanded (f.e. when copying all assets to a BAT + Pack). + :ivar path_full_field: field containing the full path of this asset. + :ivar path_dir_field: field containing the parent path (i.e. the + directory) of this asset. + :ivar path_base_field: field containing the basename of this asset. + """ + + def __init__(self, + block: blendfile.BlendFileBlock, + asset_path: bpathlib.BlendPath, + is_sequence: bool = False, + path_full_field: dna.Field = None, + path_dir_field: dna.Field = None, + path_base_field: dna.Field = None, + block_name: bytes = b'', + ) -> None: + if block_name: + self.block_name = block_name + else: + self.block_name = self.guess_block_name(block) + + assert isinstance(block, blendfile.BlendFileBlock) + assert isinstance(asset_path, (bytes, bpathlib.BlendPath)), \ + 'asset_path should be BlendPath, not %r' % type(asset_path) + + if path_full_field is None: + assert isinstance(path_dir_field, dna.Field), \ + 'path_dir_field should be dna.Field, not %r' % type(path_dir_field) + assert isinstance(path_base_field, dna.Field), \ + 'path_base_field should be dna.Field, not %r' % type(path_base_field) + else: + assert isinstance(path_full_field, dna.Field), \ + 'path_full_field should be dna.Field, not %r' % type(path_full_field) + + if isinstance(asset_path, bytes): + asset_path = bpathlib.BlendPath(asset_path) + + self.block = block + self.asset_path = asset_path + self.is_sequence = bool(is_sequence) + self.path_full_field = path_full_field + self.path_dir_field = path_dir_field + self.path_base_field = path_base_field + + # cached by __fspath__() + self._abspath = None # type: typing.Optional[pathlib.Path] + + @staticmethod + def guess_block_name(block: blendfile.BlendFileBlock) -> bytes: + try: + return block[b'id', b'name'] + except KeyError: + pass + try: + return block[b'name'] + except KeyError: + pass + return b'-unnamed-' + + def __repr__(self): + if self.path_full_field is None: + field_name = self.path_dir_field.name.name_full.decode() + \ + '/' + \ + self.path_base_field.name.name_full.decode() + else: + field_name = self.path_full_field.name.name_full.decode() + return '' % ( + self.block_name, self.block.dna_type_name, + field_name, self.asset_path, + ' sequence' if self.is_sequence else '' + ) + + def files(self) -> typing.Iterator[pathlib.Path]: + """Determine absolute path(s) of the asset file(s). + + A relative path is interpreted relative to the blend file referring + to the asset. If this BlockUsage represents a sequence, the filesystem + is inspected and the actual files in the sequence are yielded. + + It is assumed that paths are valid UTF-8. + """ + + path = self.__fspath__() + if not self.is_sequence: + if not path.exists(): + log.warning('Path %s does not exist for %s', path, self) + return + yield path + return + + try: + yield from file_sequence.expand_sequence(path) + except file_sequence.DoesNotExist: + log.warning('Path %s does not exist for %s', path, self) + + def __fspath__(self) -> pathlib.Path: + """Determine the absolute path of the asset on the filesystem.""" + if self._abspath is None: + bpath = self.block.bfile.abspath(self.asset_path) + log.info('Resolved %s rel to %s -> %s', + self.asset_path, self.block.bfile.filepath, bpath) + + as_path = pathlib.Path(bpath.to_path()) + + # Windows cannot make a path that has a glob pattern in it absolute. + # Since globs are generally only on the filename part, we take that off, + # make the parent directory absolute, then put the filename back. + try: + abs_parent = bpathlib.make_absolute(as_path.parent) + except FileNotFoundError: + self._abspath = as_path + else: + self._abspath = abs_parent / as_path.name + + log.info('Resolving %s rel to %s -> %s', + self.asset_path, self.block.bfile.filepath, self._abspath) + else: + log.info('Reusing abspath %s', self._abspath) + return self._abspath + + abspath = property(__fspath__) + + def __lt__(self, other: 'BlockUsage'): + """Allow sorting for repeatable and predictable unit tests.""" + if not isinstance(other, BlockUsage): + raise NotImplemented() + return self.block_name < other.block_name and self.block < other.block + + def __eq__(self, other: object): + if not isinstance(other, BlockUsage): + return False + return self.block_name == other.block_name and self.block == other.block + + def __hash__(self): + return hash((self.block_name, hash(self.block)))