blender_asset_tracer/pack/shaman/transfer.py

405 lines
15 KiB
Python
Raw Normal View History

2021-10-18 15:54:04 +02:00
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# ***** END GPL LICENCE BLOCK *****
#
# (c) 2019, Blender Foundation - Sybren A. Stüvel
import collections
import logging
import pathlib
import random
import typing
import requests
import blender_asset_tracer.pack.transfer as bat_transfer
from blender_asset_tracer import bpathlib
MAX_DEFERRED_PATHS = 8
MAX_FAILED_PATHS = 8
response_file_unknown = "file-unknown"
response_already_uploading = "already-uploading"
class FileInfo:
def __init__(self, checksum: str, filesize: int, abspath: pathlib.Path):
self.checksum = checksum
self.filesize = filesize
self.abspath = abspath
class ShamanTransferrer(bat_transfer.FileTransferer):
"""Sends files to a Shaman server."""
class AbortUpload(Exception):
"""Raised from the upload callback to abort an upload."""
2023-01-10 11:41:55 +01:00
def __init__(
self,
auth_token: str,
project_root: pathlib.Path,
shaman_endpoint: str,
checkout_id: str,
) -> None:
2021-10-18 15:54:04 +02:00
from . import client
2023-01-10 11:41:55 +01:00
2021-10-18 15:54:04 +02:00
super().__init__()
self.client = client.ShamanClient(auth_token, shaman_endpoint)
self.project_root = project_root
self.checkout_id = checkout_id
self.log = logging.getLogger(__name__)
self._file_info = {} # type: typing.Dict[str, FileInfo]
# When the Shaman creates a checkout, it'll return the location of that
# checkout. This can then be combined with the project-relative path
# of the to-be-rendered blend file (e.g. the one 'bat pack' was pointed
# at).
2023-01-10 11:41:55 +01:00
self._checkout_location = ""
2021-10-18 15:54:04 +02:00
self.uploaded_files = 0
self.uploaded_bytes = 0
# noinspection PyBroadException
def run(self) -> None:
try:
self.uploaded_files = 0
self.uploaded_bytes = 0
# Construct the Shaman Checkout Definition file.
# This blocks until we know the entire list of files to transfer.
2023-01-10 11:41:55 +01:00
(
definition_file,
allowed_relpaths,
delete_when_done,
) = self._create_checkout_definition()
2021-10-18 15:54:04 +02:00
if not definition_file:
# An error has already been logged.
return
2023-01-10 11:41:55 +01:00
self.log.info(
"Created checkout definition file of %d KiB",
len(definition_file) // 1024,
)
self.log.info("Feeding %d files to the Shaman", len(self._file_info))
2021-10-18 15:54:04 +02:00
if self.log.isEnabledFor(logging.INFO):
for path in self._file_info:
2023-01-10 11:41:55 +01:00
self.log.info(" - %s", path)
2021-10-18 15:54:04 +02:00
# Try to upload all the files.
failed_paths = set() # type: typing.Set[str]
max_tries = 50
for try_index in range(max_tries):
# Send the file to the Shaman and see what we still need to send there.
2023-01-10 11:41:55 +01:00
to_upload = self._send_checkout_def_to_shaman(
definition_file, allowed_relpaths
)
2021-10-18 15:54:04 +02:00
if to_upload is None:
# An error has already been logged.
return
if not to_upload:
break
# Send the files that still need to be sent.
2023-01-10 11:41:55 +01:00
self.log.info("Upload attempt %d", try_index + 1)
2021-10-18 15:54:04 +02:00
failed_paths = self._upload_files(to_upload)
if not failed_paths:
break
# Having failed paths at this point is expected when multiple
# clients are sending the same files. Instead of retrying on a
# file-by-file basis, we just re-send the checkout definition
# file to the Shaman and obtain a new list of files to upload.
if failed_paths:
2023-01-10 11:41:55 +01:00
self.log.error("Aborting upload due to too many failures")
self.error_set(
"Giving up after %d attempts to upload the files" % max_tries
)
2021-10-18 15:54:04 +02:00
return
2023-01-10 11:41:55 +01:00
self.log.info("All files uploaded succesfully")
2021-10-18 15:54:04 +02:00
self._request_checkout(definition_file)
# Delete the files that were supposed to be moved.
for src in delete_when_done:
self.delete_file(src)
except Exception as ex:
# We have to catch exceptions in a broad way, as this is running in
# a separate thread, and exceptions won't otherwise be seen.
2023-01-10 11:41:55 +01:00
self.log.exception("Error transferring files to Shaman")
self.error_set("Unexpected exception transferring files to Shaman: %s" % ex)
2021-10-18 15:54:04 +02:00
# noinspection PyBroadException
2023-01-10 11:41:55 +01:00
def _create_checkout_definition(
self,
) -> typing.Tuple[bytes, typing.Set[str], typing.List[pathlib.Path]]:
2021-10-18 15:54:04 +02:00
"""Create the checkout definition file for this BAT pack.
:returns: the checkout definition (as bytes), a set of paths in that file,
and list of paths to delete.
If there was an error and file transfer was aborted, the checkout
definition file will be empty.
"""
from . import cache
definition_lines = [] # type: typing.List[bytes]
delete_when_done = [] # type: typing.List[pathlib.Path]
# We keep track of the relative paths we want to send to the Shaman,
# so that the Shaman cannot ask us to upload files we didn't want to.
relpaths = set() # type: typing.Set[str]
for src, dst, act in self.iter_queue():
try:
checksum = cache.compute_cached_checksum(src)
filesize = src.stat().st_size
# relpath = dst.relative_to(self.project_root)
relpath = bpathlib.strip_root(dst).as_posix()
self._file_info[relpath] = FileInfo(
checksum=checksum,
filesize=filesize,
abspath=src,
)
2023-01-10 11:41:55 +01:00
line = "%s %s %s" % (checksum, filesize, relpath)
definition_lines.append(line.encode("utf8"))
2021-10-18 15:54:04 +02:00
relpaths.add(relpath)
if act == bat_transfer.Action.MOVE:
delete_when_done.append(src)
except Exception:
# We have to catch exceptions in a broad way, as this is running in
# a separate thread, and exceptions won't otherwise be seen.
2023-01-10 11:41:55 +01:00
msg = "Error transferring %s to %s" % (src, dst)
2021-10-18 15:54:04 +02:00
self.log.exception(msg)
# Put the files to copy back into the queue, and abort. This allows
# the main thread to inspect the queue and see which files were not
# copied. The one we just failed (due to this exception) should also
# be reported there.
self.queue.put((src, dst, act))
self.error_set(msg)
2023-01-10 11:41:55 +01:00
return b"", set(), delete_when_done
2021-10-18 15:54:04 +02:00
cache.cleanup_cache()
2023-01-10 11:41:55 +01:00
return b"\n".join(definition_lines), relpaths, delete_when_done
2021-10-18 15:54:04 +02:00
2023-01-10 11:41:55 +01:00
def _send_checkout_def_to_shaman(
self, definition_file: bytes, allowed_relpaths: typing.Set[str]
) -> typing.Optional[collections.deque]:
2021-10-18 15:54:04 +02:00
"""Send the checkout definition file to the Shaman.
:return: An iterable of paths (relative to the project root) that still
need to be uploaded, or None if there was an error.
"""
2023-01-10 11:41:55 +01:00
resp = self.client.post(
"checkout/requirements",
data=definition_file,
stream=True,
headers={"Content-Type": "text/plain"},
timeout=15,
)
2021-10-18 15:54:04 +02:00
if resp.status_code >= 300:
2023-01-10 11:41:55 +01:00
msg = "Error from Shaman, code %d: %s" % (resp.status_code, resp.text)
2021-10-18 15:54:04 +02:00
self.log.error(msg)
self.error_set(msg)
return None
to_upload = collections.deque() # type: collections.deque
for line in resp.iter_lines():
2023-01-10 11:41:55 +01:00
response, path = line.decode().split(" ", 1)
self.log.debug(" %s: %s", response, path)
2021-10-18 15:54:04 +02:00
if path not in allowed_relpaths:
2023-01-10 11:41:55 +01:00
msg = "Shaman requested path we did not intend to upload: %r" % path
2021-10-18 15:54:04 +02:00
self.log.error(msg)
self.error_set(msg)
return None
if response == response_file_unknown:
to_upload.appendleft(path)
elif response == response_already_uploading:
to_upload.append(path)
2023-01-10 11:41:55 +01:00
elif response == "ERROR":
msg = "Error from Shaman: %s" % path
2021-10-18 15:54:04 +02:00
self.log.error(msg)
self.error_set(msg)
return None
else:
2023-01-10 11:41:55 +01:00
msg = "Unknown response from Shaman for path %r: %r" % (path, response)
2021-10-18 15:54:04 +02:00
self.log.error(msg)
self.error_set(msg)
return None
return to_upload
def _upload_files(self, to_upload: collections.deque) -> typing.Set[str]:
"""Actually upload the files to Shaman.
Returns the set of files that we did not upload.
"""
failed_paths = set() # type: typing.Set[str]
deferred_paths = set()
def defer(some_path: str):
nonlocal to_upload
2023-01-10 11:41:55 +01:00
self.log.info(
" %s deferred (already being uploaded by someone else)", some_path
)
2021-10-18 15:54:04 +02:00
deferred_paths.add(some_path)
# Instead of deferring this one file, randomize the files to upload.
# This prevents multiple deferrals when someone else is uploading
# files from the same project (because it probably happens alphabetically).
all_files = list(to_upload)
random.shuffle(all_files)
to_upload = collections.deque(all_files)
if not to_upload:
2023-01-10 11:41:55 +01:00
self.log.info(
"All %d files are at the Shaman already", len(self._file_info)
)
2021-10-18 15:54:04 +02:00
self.report_transferred(0)
return failed_paths
2023-01-10 11:41:55 +01:00
self.log.info(
"Going to upload %d of %d files", len(to_upload), len(self._file_info)
)
2021-10-18 15:54:04 +02:00
while to_upload:
# After too many failures, just retry to get a fresh set of files to upload.
if len(failed_paths) > MAX_FAILED_PATHS:
2023-01-10 11:41:55 +01:00
self.log.info("Too many failures, going to abort this iteration")
2021-10-18 15:54:04 +02:00
failed_paths.update(to_upload)
return failed_paths
path = to_upload.popleft()
fileinfo = self._file_info[path]
2023-01-10 11:41:55 +01:00
self.log.info(" %s", path)
2021-10-18 15:54:04 +02:00
headers = {
2023-01-10 11:41:55 +01:00
"X-Shaman-Original-Filename": path,
2021-10-18 15:54:04 +02:00
}
# Let the Shaman know whether we can defer uploading this file or not.
2023-01-10 11:41:55 +01:00
can_defer = (
len(deferred_paths) < MAX_DEFERRED_PATHS
and path not in deferred_paths
and len(to_upload)
)
2021-10-18 15:54:04 +02:00
if can_defer:
2023-01-10 11:41:55 +01:00
headers["X-Shaman-Can-Defer-Upload"] = "true"
2021-10-18 15:54:04 +02:00
2023-01-10 11:41:55 +01:00
url = "files/%s/%d" % (fileinfo.checksum, fileinfo.filesize)
2021-10-18 15:54:04 +02:00
try:
2023-01-10 11:41:55 +01:00
with fileinfo.abspath.open("rb") as infile:
2021-10-18 15:54:04 +02:00
resp = self.client.post(url, data=infile, headers=headers)
except requests.ConnectionError as ex:
if can_defer:
# Closing the connection with an 'X-Shaman-Can-Defer-Upload: true' header
# indicates that we should defer the upload. Requests doesn't give us the
# reply, even though it was written by the Shaman before it closed the
# connection.
defer(path)
else:
2023-01-10 11:41:55 +01:00
self.log.info(
" %s could not be uploaded, might retry later: %s", path, ex
)
2021-10-18 15:54:04 +02:00
failed_paths.add(path)
continue
if resp.status_code == 208:
# For small files we get the 208 response, because the server closes the
# connection after we sent the entire request. For bigger files the server
# responds sooner, and Requests gives us the above ConnectionError.
if can_defer:
defer(path)
else:
2023-01-10 11:41:55 +01:00
self.log.info(" %s skipped (already existed on the server)", path)
2021-10-18 15:54:04 +02:00
continue
if resp.status_code >= 300:
2023-01-10 11:41:55 +01:00
msg = "Error from Shaman uploading %s, code %d: %s" % (
fileinfo.abspath,
resp.status_code,
resp.text,
)
2021-10-18 15:54:04 +02:00
self.log.error(msg)
self.error_set(msg)
return failed_paths
failed_paths.discard(path)
self.uploaded_files += 1
file_size = fileinfo.abspath.stat().st_size
self.uploaded_bytes += file_size
self.report_transferred(file_size)
if not failed_paths:
2023-01-10 11:41:55 +01:00
self.log.info(
"Done uploading %d bytes in %d files",
self.uploaded_bytes,
self.uploaded_files,
)
2021-10-18 15:54:04 +02:00
else:
2023-01-10 11:41:55 +01:00
self.log.info(
"Uploaded %d bytes in %d files so far",
self.uploaded_bytes,
self.uploaded_files,
)
2021-10-18 15:54:04 +02:00
return failed_paths
def report_transferred(self, bytes_transferred: int):
if self._abort.is_set():
2023-01-10 11:41:55 +01:00
self.log.warning("Interrupting ongoing upload")
raise self.AbortUpload("interrupting ongoing upload")
2021-10-18 15:54:04 +02:00
super().report_transferred(bytes_transferred)
def _request_checkout(self, definition_file: bytes):
"""Ask the Shaman to create a checkout of this BAT pack."""
if not self.checkout_id:
2023-01-10 11:41:55 +01:00
self.log.warning("NOT requesting checkout at Shaman")
2021-10-18 15:54:04 +02:00
return
2023-01-10 11:41:55 +01:00
self.log.info(
"Requesting checkout at Shaman for checkout_id=%r", self.checkout_id
)
resp = self.client.post(
"checkout/create/%s" % self.checkout_id,
data=definition_file,
headers={"Content-Type": "text/plain"},
)
2021-10-18 15:54:04 +02:00
if resp.status_code >= 300:
2023-01-10 11:41:55 +01:00
msg = "Error from Shaman, code %d: %s" % (resp.status_code, resp.text)
2021-10-18 15:54:04 +02:00
self.log.error(msg)
self.error_set(msg)
return
self._checkout_location = resp.text.strip()
2023-01-10 11:41:55 +01:00
self.log.info("Response from Shaman, code %d: %s", resp.status_code, resp.text)
2021-10-18 15:54:04 +02:00
@property
def checkout_location(self) -> str:
"""Returns the checkout location, or '' if no checkout was made."""
if not self._checkout_location:
2023-01-10 11:41:55 +01:00
return ""
2021-10-18 15:54:04 +02:00
return self._checkout_location