blender_asset_tracer/pack/shaman/cache.py

198 lines
6.0 KiB
Python

# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# ***** END GPL LICENCE BLOCK *****
#
# (c) 2019, Blender Foundation - Sybren A. Stüvel
import base64
import hashlib
import json
import logging
import sys
import time
import typing
from collections import deque
from pathlib import Path
from . import time_tracker
CACHE_ROOT = Path().home() / '.cache/shaman-client/shasums'
MAX_CACHE_FILES_AGE_SECS = 3600 * 24 * 60 # 60 days
log = logging.getLogger(__name__)
class TimeInfo:
computing_checksums = 0.0
checksum_cache_handling = 0.0
def find_files(root: Path) -> typing.Iterable[Path]:
"""Recursively finds files in the given root path.
Directories are recursed into, and file paths are yielded.
Symlinks are yielded if they refer to a regular file.
"""
queue = deque([root])
while queue:
path = queue.popleft()
# Ignore hidden files/dirs; these can be things like '.svn' or '.git',
# which shouldn't be sent to Shaman.
if path.name.startswith('.'):
continue
if path.is_dir():
for child in path.iterdir():
queue.append(child)
continue
# Only yield symlinks if they link to (a link to) a normal file.
if path.is_symlink():
symlinked = path.resolve()
if symlinked.is_file():
yield path
continue
if path.is_file():
yield path
def compute_checksum(filepath: Path) -> str:
"""Compute the SHA256 checksum for the given file."""
blocksize = 32 * 1024
log.debug('Computing checksum of %s', filepath)
with time_tracker.track_time(TimeInfo, 'computing_checksums'):
hasher = hashlib.sha256()
with filepath.open('rb') as infile:
while True:
block = infile.read(blocksize)
if not block:
break
hasher.update(block)
checksum = hasher.hexdigest()
return checksum
def _cache_path(filepath: Path) -> Path:
"""Compute the cache file for the given file path."""
fs_encoding = sys.getfilesystemencoding()
filepath = filepath.absolute()
# Reverse the directory, because most variation is in the last bytes.
rev_dir = str(filepath.parent)[::-1]
encoded_path = filepath.stem + rev_dir + filepath.suffix
cache_key = base64.urlsafe_b64encode(encoded_path.encode(fs_encoding)).decode().rstrip('=')
cache_path = CACHE_ROOT / cache_key[:10] / cache_key[10:]
return cache_path
def compute_cached_checksum(filepath: Path) -> str:
"""Computes the SHA256 checksum.
The checksum is cached to disk. If the cache is still valid, it is used to
skip the actual SHA256 computation.
"""
with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'):
current_stat = filepath.stat()
cache_path = _cache_path(filepath)
try:
with cache_path.open('r') as cache_file:
payload = json.load(cache_file)
except (OSError, ValueError):
# File may not exist, or have invalid contents.
pass
else:
checksum = payload.get('checksum', '')
cached_mtime = payload.get('file_mtime', 0.0)
cached_size = payload.get('file_size', -1)
if (checksum
and current_stat.st_size == cached_size
and abs(cached_mtime - current_stat.st_mtime) < 0.01):
cache_path.touch()
return checksum
checksum = compute_checksum(filepath)
with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'):
payload = {
'checksum': checksum,
'file_mtime': current_stat.st_mtime,
'file_size': current_stat.st_size,
}
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
with cache_path.open('w') as cache_file:
json.dump(payload, cache_file)
except IOError as ex:
log.warning('Unable to write checksum cache file %s: %s', cache_path, ex)
return checksum
def cleanup_cache() -> None:
"""Remove all cache files that are older than MAX_CACHE_FILES_AGE_SECS."""
if not CACHE_ROOT.exists():
return
with time_tracker.track_time(TimeInfo, 'checksum_cache_handling'):
queue = deque([CACHE_ROOT])
rmdir_queue = []
now = time.time()
num_removed_files = 0
num_removed_dirs = 0
while queue:
path = queue.popleft()
if path.is_dir():
queue.extend(path.iterdir())
rmdir_queue.append(path)
continue
assert path.is_file()
path.relative_to(CACHE_ROOT)
age = now - path.stat().st_mtime
# Don't trust files from the future either.
if 0 <= age <= MAX_CACHE_FILES_AGE_SECS:
continue
path.unlink()
num_removed_files += 1
for dirpath in reversed(rmdir_queue):
assert dirpath.is_dir()
dirpath.relative_to(CACHE_ROOT)
try:
dirpath.rmdir()
num_removed_dirs += 1
except OSError:
pass
if num_removed_dirs or num_removed_files:
log.info('Cache Cleanup: removed %d dirs and %d files', num_removed_dirs, num_removed_files)