blender_asset_tracer/pack/shaman/cache.py

206 lines
6.1 KiB
Python
Raw Normal View History

2021-10-18 15:54:04 +02:00
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# ***** END GPL LICENCE BLOCK *****
#
# (c) 2019, Blender Foundation - Sybren A. Stüvel
import base64
import hashlib
import json
import logging
import sys
import time
import typing
from collections import deque
from pathlib import Path
from . import time_tracker
2023-01-10 11:41:55 +01:00
CACHE_ROOT = Path().home() / ".cache/shaman-client/shasums"
2021-10-18 15:54:04 +02:00
MAX_CACHE_FILES_AGE_SECS = 3600 * 24 * 60 # 60 days
log = logging.getLogger(__name__)
class TimeInfo:
computing_checksums = 0.0
checksum_cache_handling = 0.0
def find_files(root: Path) -> typing.Iterable[Path]:
"""Recursively finds files in the given root path.
Directories are recursed into, and file paths are yielded.
Symlinks are yielded if they refer to a regular file.
"""
queue = deque([root])
while queue:
path = queue.popleft()
# Ignore hidden files/dirs; these can be things like '.svn' or '.git',
# which shouldn't be sent to Shaman.
2023-01-10 11:41:55 +01:00
if path.name.startswith("."):
2021-10-18 15:54:04 +02:00
continue
if path.is_dir():
for child in path.iterdir():
queue.append(child)
continue
# Only yield symlinks if they link to (a link to) a normal file.
if path.is_symlink():
symlinked = path.resolve()
if symlinked.is_file():
yield path
continue
if path.is_file():
yield path
def compute_checksum(filepath: Path) -> str:
"""Compute the SHA256 checksum for the given file."""
blocksize = 32 * 1024
2023-01-10 11:41:55 +01:00
log.debug("Computing checksum of %s", filepath)
with time_tracker.track_time(TimeInfo, "computing_checksums"):
2021-10-18 15:54:04 +02:00
hasher = hashlib.sha256()
2023-01-10 11:41:55 +01:00
with filepath.open("rb") as infile:
2021-10-18 15:54:04 +02:00
while True:
block = infile.read(blocksize)
if not block:
break
hasher.update(block)
checksum = hasher.hexdigest()
return checksum
def _cache_path(filepath: Path) -> Path:
"""Compute the cache file for the given file path."""
fs_encoding = sys.getfilesystemencoding()
filepath = filepath.absolute()
# Reverse the directory, because most variation is in the last bytes.
rev_dir = str(filepath.parent)[::-1]
encoded_path = filepath.stem + rev_dir + filepath.suffix
2023-01-10 11:41:55 +01:00
cache_key = (
base64.urlsafe_b64encode(encoded_path.encode(fs_encoding)).decode().rstrip("=")
)
2021-10-18 15:54:04 +02:00
cache_path = CACHE_ROOT / cache_key[:10] / cache_key[10:]
return cache_path
def compute_cached_checksum(filepath: Path) -> str:
"""Computes the SHA256 checksum.
The checksum is cached to disk. If the cache is still valid, it is used to
skip the actual SHA256 computation.
"""
2023-01-10 11:41:55 +01:00
with time_tracker.track_time(TimeInfo, "checksum_cache_handling"):
2021-10-18 15:54:04 +02:00
current_stat = filepath.stat()
cache_path = _cache_path(filepath)
try:
2023-01-10 11:41:55 +01:00
with cache_path.open("r") as cache_file:
2021-10-18 15:54:04 +02:00
payload = json.load(cache_file)
except (OSError, ValueError):
# File may not exist, or have invalid contents.
pass
else:
2023-01-10 11:41:55 +01:00
checksum = payload.get("checksum", "")
cached_mtime = payload.get("file_mtime", 0.0)
cached_size = payload.get("file_size", -1)
if (
checksum
and current_stat.st_size == cached_size
and abs(cached_mtime - current_stat.st_mtime) < 0.01
):
2021-10-18 15:54:04 +02:00
cache_path.touch()
return checksum
checksum = compute_checksum(filepath)
2023-01-10 11:41:55 +01:00
with time_tracker.track_time(TimeInfo, "checksum_cache_handling"):
2021-10-18 15:54:04 +02:00
payload = {
2023-01-10 11:41:55 +01:00
"checksum": checksum,
"file_mtime": current_stat.st_mtime,
"file_size": current_stat.st_size,
2021-10-18 15:54:04 +02:00
}
try:
cache_path.parent.mkdir(parents=True, exist_ok=True)
2023-01-10 11:41:55 +01:00
with cache_path.open("w") as cache_file:
2021-10-18 15:54:04 +02:00
json.dump(payload, cache_file)
except IOError as ex:
2023-01-10 11:41:55 +01:00
log.warning("Unable to write checksum cache file %s: %s", cache_path, ex)
2021-10-18 15:54:04 +02:00
return checksum
def cleanup_cache() -> None:
"""Remove all cache files that are older than MAX_CACHE_FILES_AGE_SECS."""
if not CACHE_ROOT.exists():
return
2023-01-10 11:41:55 +01:00
with time_tracker.track_time(TimeInfo, "checksum_cache_handling"):
2021-10-18 15:54:04 +02:00
queue = deque([CACHE_ROOT])
rmdir_queue = []
now = time.time()
num_removed_files = 0
num_removed_dirs = 0
while queue:
path = queue.popleft()
if path.is_dir():
queue.extend(path.iterdir())
rmdir_queue.append(path)
continue
assert path.is_file()
path.relative_to(CACHE_ROOT)
age = now - path.stat().st_mtime
# Don't trust files from the future either.
if 0 <= age <= MAX_CACHE_FILES_AGE_SECS:
continue
path.unlink()
num_removed_files += 1
for dirpath in reversed(rmdir_queue):
assert dirpath.is_dir()
dirpath.relative_to(CACHE_ROOT)
try:
dirpath.rmdir()
num_removed_dirs += 1
except OSError:
pass
if num_removed_dirs or num_removed_files:
2023-01-10 11:41:55 +01:00
log.info(
"Cache Cleanup: removed %d dirs and %d files",
num_removed_dirs,
num_removed_files,
)