Added 'bat blocks' command that shows the DNA types that use the most data

It's work in progress and maybe not useful for anyone but us.
2018-11-30 13:58:19 +01:00 · 2018-11-30 13:58:19 +01:00 · 78981ce9f1
commit 78981ce9f1
parent b52faa6e75
3 changed files with 186 additions and 1 deletions
--- a/blender_asset_tracer/cli/init.py
+++ b/blender_asset_tracer/cli/init.py
@ -24,7 +24,7 @@ import datetime
 import logging
 import time
-from . import common, pack, list_deps
+from . import blocks, common, pack, list_deps
 def cli_main():
@ -51,6 +51,7 @@ def cli_main():
             'whereas subcommand-specific options go after it. '
             'Use --help after the subcommand to get more info.')
    blocks.add_parser(subparsers)
    pack.add_parser(subparsers)
    list_deps.add_parser(subparsers)
--- a/blender_asset_tracer/cli/blocks.py
+++ b/blender_asset_tracer/cli/blocks.py
@ -0,0 +1,127 @@
 # ***** BEGIN GPL LICENSE BLOCK *****
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software Foundation,
 # Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 #
 # ***** END GPL LICENCE BLOCK *****
 #
 # (c) 2018, Blender Foundation - Sybren A. Stüvel
 """List count and total size of datablocks in a blend file."""
 import collections
 import logging
 import pathlib
 from blender_asset_tracer import blendfile
 from . import common
 log = logging.getLogger(__name__)
 class BlockTypeInfo:
    def __init__(self):
        self.total_bytes = 0
        self.num_blocks = 0
        self.sizes = []
        self.blocks = []
        self.name = 'unset'
 def add_parser(subparsers):
    """Add argparser for this subcommand."""
    parser = subparsers.add_parser('blocks', help=__doc__)
    parser.set_defaults(func=cli_blocks)
    parser.add_argument('blendfile', type=pathlib.Path)
    parser.add_argument('-d', '--dump', default=False, action='store_true',
                        help='Hex-dump the biggest block')
 def by_total_bytes(info: BlockTypeInfo) -> int:
    return info.total_bytes
 def block_key(block: blendfile.BlendFileBlock) -> str:
    return '%s-%s' % (block.dna_type_name, block.code.decode())
 def cli_blocks(args):
    bpath = args.blendfile
    if not bpath.exists():
        log.fatal('File %s does not exist', args.blendfile)
        return 3
    per_blocktype = collections.defaultdict(BlockTypeInfo)
    print('Opening %s' % bpath)
    bfile = blendfile.BlendFile(bpath)
    print('Inspecting %s' % bpath)
    for block in bfile.blocks:
        if block.code == b'DNA1':
            continue
        index_as = block_key(block)
        info = per_blocktype[index_as]
        info.name = index_as
        info.total_bytes += block.size
        info.num_blocks += 1
        info.sizes.append(block.size)
        info.blocks.append(block)
    fmt = '%-35s %10s %10s %10s %10s'
    print(fmt % ('Block type', 'Total Size', 'Num blocks', 'Avg Size', 'Median'))
    print(fmt % (35 * '-', 10 * '-', 10 * '-', 10 * '-', 10 * '-'))
    infos = sorted(per_blocktype.values(), key=by_total_bytes, reverse=True)
    for info in infos[:10]:
        median_size = sorted(info.sizes)[len(info.sizes) // 2]
        print(fmt % (info.name,
                     common.humanize_bytes(info.total_bytes),
                     info.num_blocks,
                     common.humanize_bytes(info.total_bytes // info.num_blocks),
                     common.humanize_bytes(median_size)
                     ))
    print(70 * '-')
    # From the blocks of the most space-using category, the biggest block.
    biggest_block = sorted(infos[0].blocks,
                           key=lambda blck: blck.size,
                           reverse=True)[0]
    print('Biggest %s block is %s at address %s' % (
        block_key(biggest_block),
        common.humanize_bytes(biggest_block.size),
        biggest_block.addr_old,
    ))
    print('Finding what points there')
    addr_to_find = biggest_block.addr_old
    found_pointer = False
    for block in bfile.blocks:
        for prop_path, prop_value in block.items_recursive():
            if not isinstance(prop_value, int) or prop_value != addr_to_find:
                continue
            print('    ', block, prop_path)
            found_pointer = True
    if not found_pointer:
        print('Nothing points there')
    if args.dump:
        print('Hexdump:')
        bfile.fileobj.seek(biggest_block.file_offset)
        data = bfile.fileobj.read(biggest_block.size)
        line_len_bytes = 32
        import codecs
        for offset in range(0, len(data), line_len_bytes):
            line = codecs.encode(data[offset:offset + line_len_bytes], 'hex').decode()
            print('%6d -' % offset, ' '.join(line[i:i + 2] for i in range(0, len(line), 2)))
--- a/blender_asset_tracer/cli/common.py
+++ b/blender_asset_tracer/cli/common.py
@ -18,6 +18,8 @@
 #
 # (c) 2018, Blender Foundation - Sybren A. Stüvel
 """Common functionality for CLI parsers."""
 import typing
 import pathlib
@ -40,3 +42,58 @@ def shorten(cwd: pathlib.Path, somepath: pathlib.Path) -> pathlib.Path:
        return somepath.relative_to(cwd)
    except ValueError:
        return somepath
 def humanize_bytes(size_in_bytes: int, precision: typing.Optional[int]=None):
    """Return a humanized string representation of a number of bytes.
    Source: http://code.activestate.com/recipes/577081-humanized-representation-of-a-number-of-bytes
    :param size_in_bytes: The size to humanize
    :param precision: How many digits are shown after the comma. When None,
        it defaults to 1 unless the entire number of bytes is shown, then
        it will be 0.
    >>> humanize_bytes(1)
    '1 B'
    >>> humanize_bytes(1024)
    '1.0 kB'
    >>> humanize_bytes(1024*123, 0)
    '123 kB'
    >>> humanize_bytes(1024*123)
    '123.0 kB'
    >>> humanize_bytes(1024*12342)
    '12.1 MB'
    >>> humanize_bytes(1024*12342,2)
    '12.05 MB'
    >>> humanize_bytes(1024*1234,2)
    '1.21 MB'
    >>> humanize_bytes(1024*1234*1111,2)
    '1.31 GB'
    >>> humanize_bytes(1024*1234*1111,1)
    '1.3 GB'
    """
    if precision is None:
        precision = size_in_bytes >= 1024
    abbrevs = (
        (1 << 50, 'PB'),
        (1 << 40, 'TB'),
        (1 << 30, 'GB'),
        (1 << 20, 'MB'),
        (1 << 10, 'kB'),
        (1, 'B')
    )
    for factor, suffix in abbrevs:
        if size_in_bytes >= factor:
            break
    else:
        factor = 1
        suffix = 'B'
    return '%.*f %s' % (precision, size_in_bytes / factor, suffix)
 if __name__ == '__main__':
    import doctest
    doctest.testmod()