The Shaman server is a file storage system that identifies files by SHA256sum and file length. BAT can send packs there by only uploading changed/new files. The BAT pack is reproduced at the Shaman server's checkout directory by creating symlinks to the files in its file storage. Retrying sending files: When we can defer uploading a file (that is, when we have other files to upload as well, and we could send the current file at a later moment) we send an `X-Shaman-Can-Defer-Upload: true` header in the file upload request. In that case, when someone else is already uploading that file, a `208 Already Reported` response is sent and the connection is closed. Python's Requests library unfortunately won't give us that response if we're still streaming the request, and raise a ConnectionError exception instead. This exception can mean two things: - If the `X-Shaman-Can-Defer-Upload: true` header was sent: someone else is currently uploading that file, so defer it. - If that header was not sent: that file is already completely uploaded and does not need to be uploaded again. Instead of retrying each failed file, after a few failures we now just resend the definition file to get a new list of files to upload, then send those. This should considerably reduce the number of HTTP calls when multiple clients are uploading the same set of files.
341 lines
14 KiB
Python
341 lines
14 KiB
Python
# ***** BEGIN GPL LICENSE BLOCK *****
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License
|
|
# as published by the Free Software Foundation; either version 2
|
|
# of the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
#
|
|
# ***** END GPL LICENCE BLOCK *****
|
|
#
|
|
# (c) 2019, Blender Foundation - Sybren A. Stüvel
|
|
import collections
|
|
import logging
|
|
import pathlib
|
|
import random
|
|
import typing
|
|
|
|
import requests
|
|
|
|
import blender_asset_tracer.pack.transfer as bat_transfer
|
|
|
|
MAX_DEFERRED_PATHS = 8
|
|
MAX_FAILED_PATHS = 8
|
|
|
|
|
|
class FileInfo:
|
|
def __init__(self, checksum: str, filesize: int, abspath: pathlib.Path):
|
|
self.checksum = checksum
|
|
self.filesize = filesize
|
|
self.abspath = abspath
|
|
|
|
|
|
class ShamanTransferrer(bat_transfer.FileTransferer):
|
|
"""Sends files to a Shaman server."""
|
|
|
|
class AbortUpload(Exception):
|
|
"""Raised from the upload callback to abort an upload."""
|
|
|
|
def __init__(self, auth_token: str, project_root: pathlib.Path,
|
|
shaman_endpoint: str, checkout_id: str) -> None:
|
|
from . import client
|
|
super().__init__()
|
|
self.client = client.ShamanClient(auth_token, shaman_endpoint)
|
|
self.project_root = project_root
|
|
self.checkout_id = checkout_id
|
|
self.log = logging.getLogger(__name__)
|
|
|
|
self._file_info = {} # type: typing.Dict[str, FileInfo]
|
|
|
|
# When the Shaman creates a checkout, it'll return the location of that
|
|
# checkout. This can then be combined with the project-relative path
|
|
# of the to-be-rendered blend file (e.g. the one 'bat pack' was pointed
|
|
# at).
|
|
self._checkout_location = ''
|
|
|
|
# noinspection PyBroadException
|
|
def run(self) -> None:
|
|
try:
|
|
# Construct the Shaman Checkout Definition file.
|
|
# This blocks until we know the entire list of files to transfer.
|
|
definition_file, allowed_relpaths, delete_when_done = self._create_checkout_definition()
|
|
if not definition_file:
|
|
# An error has already been logged.
|
|
return
|
|
|
|
self.log.info('Created checkout definition file of %d KiB',
|
|
len(definition_file) // 1024)
|
|
self.log.info('Feeding %d files to the Shaman', len(self._file_info))
|
|
|
|
# Try to upload all the files.
|
|
failed_paths = set() # type: typing.Set[str]
|
|
max_tries = 50
|
|
for try_index in range(max_tries):
|
|
# Send the file to the Shaman and see what we still need to send there.
|
|
to_upload = self._send_checkout_def_to_shaman(definition_file, allowed_relpaths)
|
|
if to_upload is None:
|
|
# An error has already been logged.
|
|
return
|
|
|
|
# Send the files that still need to be sent.
|
|
self.log.info('Upload attempt %d', try_index+1)
|
|
failed_paths = self._upload_files(to_upload)
|
|
if not failed_paths:
|
|
break
|
|
|
|
# Having failed paths at this point is expected when multiple
|
|
# clients are sending the same files. Instead of retrying on a
|
|
# file-by-file basis, we just re-send the checkout definition
|
|
# file to the Shaman and obtain a new list of files to upload.
|
|
|
|
if failed_paths:
|
|
self.log.error('Aborting upload due to too many failures')
|
|
self.error_set('Giving up after %d attempts to upload the files' % max_tries)
|
|
return
|
|
|
|
self.log.info('All files uploaded succesfully')
|
|
self._request_checkout(definition_file)
|
|
|
|
# Delete the files that were supposed to be moved.
|
|
for src in delete_when_done:
|
|
self.delete_file(src)
|
|
|
|
except Exception as ex:
|
|
# We have to catch exceptions in a broad way, as this is running in
|
|
# a separate thread, and exceptions won't otherwise be seen.
|
|
self.log.exception('Error transferring files to Shaman')
|
|
self.error_set('Unexpected exception transferring files to Shaman: %s' % ex)
|
|
|
|
# noinspection PyBroadException
|
|
def _create_checkout_definition(self) \
|
|
-> typing.Tuple[bytes, typing.Set[str], typing.List[pathlib.Path]]:
|
|
"""Create the checkout definition file for this BAT pack.
|
|
|
|
:returns: the checkout definition (as bytes), a set of paths in that file,
|
|
and list of paths to delete.
|
|
|
|
If there was an error and file transfer was aborted, the checkout
|
|
definition file will be empty.
|
|
"""
|
|
from . import cache
|
|
|
|
definition_lines = [] # type: typing.List[bytes]
|
|
delete_when_done = [] # type: typing.List[pathlib.Path]
|
|
|
|
# We keep track of the relative paths we want to send to the Shaman,
|
|
# so that the Shaman cannot ask us to upload files we didn't want to.
|
|
relpaths = set() # type: typing.Set[str]
|
|
|
|
for src, dst, act in self.iter_queue():
|
|
try:
|
|
checksum = cache.compute_cached_checksum(src)
|
|
filesize = src.stat().st_size
|
|
# relpath = dst.relative_to(self.project_root)
|
|
relpath = str(dst)[1:]
|
|
|
|
self._file_info[relpath] = FileInfo(
|
|
checksum=checksum,
|
|
filesize=filesize,
|
|
abspath=src,
|
|
)
|
|
line = '%s %s %s' % (checksum, filesize, relpath)
|
|
definition_lines.append(line.encode('utf8'))
|
|
relpaths.add(relpath)
|
|
|
|
if act == bat_transfer.Action.MOVE:
|
|
delete_when_done.append(src)
|
|
except Exception:
|
|
# We have to catch exceptions in a broad way, as this is running in
|
|
# a separate thread, and exceptions won't otherwise be seen.
|
|
msg = 'Error transferring %s to %s' % (src, dst)
|
|
self.log.exception(msg)
|
|
# Put the files to copy back into the queue, and abort. This allows
|
|
# the main thread to inspect the queue and see which files were not
|
|
# copied. The one we just failed (due to this exception) should also
|
|
# be reported there.
|
|
self.queue.put((src, dst, act))
|
|
self.error_set(msg)
|
|
return b'', set(), delete_when_done
|
|
|
|
cache.cleanup_cache()
|
|
return b'\n'.join(definition_lines), relpaths, delete_when_done
|
|
|
|
def _send_checkout_def_to_shaman(self, definition_file: bytes,
|
|
allowed_relpaths: typing.Set[str]) \
|
|
-> typing.Optional[typing.Deque[str]]:
|
|
"""Send the checkout definition file to the Shaman.
|
|
|
|
:return: An iterable of paths (relative to the project root) that still
|
|
need to be uploaded, or None if there was an error.
|
|
"""
|
|
resp = self.client.post('checkout/requirements', data=definition_file, stream=True,
|
|
headers={'Content-Type': 'text/plain'})
|
|
if resp.status_code >= 300:
|
|
msg = 'Error from Shaman, code %d: %s' % (resp.status_code, resp.text)
|
|
self.log.error(msg)
|
|
self.error_set(msg)
|
|
return None
|
|
|
|
to_upload = collections.deque() # type: typing.Deque[str]
|
|
for line in resp.iter_lines():
|
|
response, path = line.decode().split(' ', 1)
|
|
self.log.debug(' %s: %s', response, path)
|
|
|
|
if path not in allowed_relpaths:
|
|
msg = 'Shaman requested path we did not intend to upload: %r' % path
|
|
self.log.error(msg)
|
|
self.error_set(msg)
|
|
return None
|
|
|
|
if response == 'does-not-exist':
|
|
to_upload.appendleft(path)
|
|
elif response == 'already-uploading':
|
|
to_upload.append(path)
|
|
elif response == 'ERROR':
|
|
msg = 'Error from Shaman: %s' % path
|
|
self.log.error(msg)
|
|
self.error_set(msg)
|
|
return None
|
|
else:
|
|
msg = 'Unknown response from Shaman for path %r: %r' % (path, response)
|
|
self.log.error(msg)
|
|
self.error_set(msg)
|
|
return None
|
|
|
|
return to_upload
|
|
|
|
def _upload_files(self, to_upload: typing.Deque[str]) -> typing.Set[str]:
|
|
"""Actually upload the files to Shaman.
|
|
|
|
Returns the set of files that we did not upload.
|
|
"""
|
|
failed_paths = set() # type: typing.Set[str]
|
|
deferred_paths = set()
|
|
|
|
def defer(some_path: str):
|
|
nonlocal to_upload
|
|
|
|
self.log.info(' %s deferred (already being uploaded by someone else)', some_path)
|
|
deferred_paths.add(some_path)
|
|
|
|
# Instead of deferring this one file, randomize the files to upload.
|
|
# This prevents multiple deferrals when someone else is uploading
|
|
# files from the same project (because it probably happens alphabetically).
|
|
all_files = list(to_upload)
|
|
random.shuffle(all_files)
|
|
to_upload = collections.deque(all_files)
|
|
|
|
if not to_upload:
|
|
self.log.info('All %d files are at the Shaman already', len(self._file_info))
|
|
self.report_transferred(0)
|
|
return failed_paths
|
|
|
|
self.log.info('Going to upload %d of %d files', len(to_upload), len(self._file_info))
|
|
uploaded_files = 0
|
|
uploaded_bytes = 0
|
|
while to_upload:
|
|
# After too many failures, just retry to get a fresh set of files to upload.
|
|
if len(failed_paths) > MAX_FAILED_PATHS:
|
|
self.log.info('Too many failures, going to abort this iteration')
|
|
failed_paths.update(to_upload)
|
|
return failed_paths
|
|
|
|
path = to_upload.popleft()
|
|
fileinfo = self._file_info[path]
|
|
self.log.info(' %s', path)
|
|
|
|
# Let the Shaman know whether we can defer uploading this file or not.
|
|
headers = {}
|
|
can_defer = (len(deferred_paths) < MAX_DEFERRED_PATHS
|
|
and path not in deferred_paths
|
|
and len(to_upload))
|
|
if can_defer:
|
|
headers['X-Shaman-Can-Defer-Upload'] = 'true'
|
|
|
|
url = 'files/%s/%d' % (fileinfo.checksum, fileinfo.filesize)
|
|
try:
|
|
with fileinfo.abspath.open('rb') as infile:
|
|
resp = self.client.post(url, data=infile, headers=headers)
|
|
|
|
except requests.ConnectionError as ex:
|
|
if can_defer:
|
|
# Closing the connection with an 'X-Shaman-Can-Defer-Upload: true' header
|
|
# indicates that we should defer the upload. Requests doesn't give us the
|
|
# reply, even though it was written by the Shaman before it closed the
|
|
# connection.
|
|
defer(path)
|
|
else:
|
|
self.log.info(' %s could not be uploaded, might retry later: %s', path, ex)
|
|
failed_paths.add(path)
|
|
continue
|
|
|
|
if resp.status_code == 208:
|
|
# For small files we get the 208 response, because the server closes the
|
|
# connection after we sent the entire request. For bigger files the server
|
|
# responds sooner, and Requests gives us the above ConnectionError.
|
|
if can_defer:
|
|
defer(path)
|
|
else:
|
|
self.log.info(' %s skipped (already existed on the server)', path)
|
|
continue
|
|
|
|
if resp.status_code >= 300:
|
|
msg = 'Error from Shaman uploading %s, code %d: %s' % (
|
|
fileinfo.abspath, resp.status_code, resp.text)
|
|
self.log.error(msg)
|
|
self.error_set(msg)
|
|
return failed_paths
|
|
|
|
failed_paths.discard(path)
|
|
uploaded_files += 1
|
|
file_size = fileinfo.abspath.stat().st_size
|
|
uploaded_bytes += file_size
|
|
self.report_transferred(file_size)
|
|
|
|
self.log.info('Uploaded %d bytes in %d files (%d files were already there)',
|
|
uploaded_bytes, uploaded_files, len(to_upload) - uploaded_files)
|
|
|
|
if not failed_paths:
|
|
self.log.info('Done uploading files')
|
|
|
|
return failed_paths
|
|
|
|
def report_transferred(self, bytes_transferred: int):
|
|
if self._abort.is_set():
|
|
self.log.warning('Interrupting ongoing upload')
|
|
raise self.AbortUpload('interrupting ongoing upload')
|
|
super().report_transferred(bytes_transferred)
|
|
|
|
def _request_checkout(self, definition_file: bytes):
|
|
"""Ask the Shaman to create a checkout of this BAT pack."""
|
|
|
|
if not self.checkout_id:
|
|
self.log.warning('NOT requesting checkout at Shaman')
|
|
return
|
|
|
|
self.log.info('Requesting checkout at Shaman for checkout_id=%r', self.checkout_id)
|
|
resp = self.client.post('checkout/create/%s' % self.checkout_id, data=definition_file,
|
|
headers={'Content-Type': 'text/plain'})
|
|
if resp.status_code >= 300:
|
|
msg = 'Error from Shaman, code %d: %s' % (resp.status_code, resp.text)
|
|
self.log.error(msg)
|
|
self.error_set(msg)
|
|
return
|
|
|
|
self._checkout_location = resp.text.strip()
|
|
self.log.info('Response from Shaman, code %d: %s', resp.status_code, resp.text)
|
|
|
|
@property
|
|
def checkout_location(self) -> str:
|
|
if not self._checkout_location:
|
|
raise ValueError('No checkout was created yet.')
|
|
return self._checkout_location
|