Added support for uploads to S3 by using the boto3 client.

This commit is contained in:
Sybren A. Stüvel 2018-03-09 16:48:34 +01:00
parent 4a0673918d
commit ea832cd666
4 changed files with 207 additions and 2 deletions

View File

@ -9,6 +9,23 @@ Blender Asset Tracer, a.k.a. BAT🦇, is the replacement of
Development is driven by choices explained in [T54125](https://developer.blender.org/T54125). Development is driven by choices explained in [T54125](https://developer.blender.org/T54125).
## Uploading to S3-compatible storage
BAT Pack supports uploading to S3-compatible storage. This requires a credentials file in
`~/.aws/credentials`. Replace the all-capital words to suit your situation.
[ENDPOINT]
aws_access_key_id = YOUR_ACCESS_KEY_ID
aws_secret_access_key = YOUR_SECRET
You can then send a BAT Pack to the storage using a target `s3:/endpoint/bucketname/path-in-bucket`, for example:
bat pack my_blendfile.blend s3:/storage.qarnot.com/jobs/awesome_work
This will upload the blend file and its dependencies to `awesome_work/my_blendfile.blend` in
the `jobs` bucket.
## Paths ## Paths
There are two object types used to represent file paths. Those are strictly separated. There are two object types used to represent file paths. Those are strictly separated.

View File

@ -30,9 +30,9 @@ def add_parser(subparsers):
def cli_pack(args): def cli_pack(args):
bpath, ppath, tpath = paths_from_cli(args) bpath, ppath, tpath = paths_from_cli(args)
packer = pack.Packer(bpath, ppath, tpath, args.noop)
packer.strategise()
packer = create_packer(args, bpath, ppath, tpath)
packer.strategise()
try: try:
packer.execute() packer.execute()
except blender_asset_tracer.pack.transfer.FileTransferError as ex: except blender_asset_tracer.pack.transfer.FileTransferError as ex:
@ -41,6 +41,25 @@ def cli_pack(args):
raise SystemExit(1) raise SystemExit(1)
def create_packer(args, bpath: pathlib.Path, ppath: pathlib.Path,
tpath: pathlib.Path) -> pack.Packer:
if str(tpath).startswith('s3:/'):
if args.noop:
raise ValueError('S3 uploader does not support no-op.')
from blender_asset_tracer.pack import s3
# Split the target path into 's3:/', hostname, and actual target path
parts = tpath.parts
endpoint = parts[1]
tpath = pathlib.Path(*tpath.parts[2:])
log.info('Uploading to S3-compatible storage %s at %s', endpoint, tpath)
return s3.S3Packer(bpath, ppath, tpath, endpoint=endpoint)
return pack.Packer(bpath, ppath, tpath, args.noop)
def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, pathlib.Path]: def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, pathlib.Path]:
"""Return paths to blendfile, project, and pack target. """Return paths to blendfile, project, and pack target.

View File

@ -0,0 +1,166 @@
"""Amazon S3-compatible uploader."""
import typing
import hashlib
import logging
import pathlib
import threading
from . import Packer, transfer
log = logging.getLogger(__name__)
def compute_md5(filepath: pathlib.Path) -> str:
hasher = hashlib.md5()
with filepath.open('rb') as infile:
while True:
block = infile.read(10240)
if not block:
break
hasher.update(block)
md5 = hasher.hexdigest()
return md5
class S3Packer(Packer):
"""Creates BAT Packs on S3-compatible storage."""
def __init__(self, *args, endpoint, **kwargs) -> None:
super().__init__(*args, **kwargs)
import boto3
# Create a session so that credentials can be read from the [endpoint]
# section in ~/.aws/credentials.
# See https://boto3.readthedocs.io/en/latest/guide/configuration.html#guide-configuration
self.session = boto3.Session(profile_name=endpoint)
self.client = self.session.client('s3', endpoint_url='https://%s' % endpoint)
def set_credentials(self,
endpoint: str,
access_key_id: str,
secret_access_key: str):
"""Set S3 credentials."""
import boto3
self.client = boto3.client('s3',
endpoint_url=endpoint,
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key)
def _create_file_transferer(self) -> transfer.FileTransferer:
return S3Transferrer(self.client)
class S3Transferrer(threading.Thread, transfer.FileTransferer):
"""Copies or moves files in source directory order."""
class AbortUpload(Exception):
"""Raised from the upload callback to abort an upload."""
def __init__(self, botoclient) -> None:
# Stupid Thread.__init__ doesn't call super().__init__(),
# so it doesn't get chained to transfer.FileTransferer.__init__().
# However, I want to have Thread as first subclass so that its
# start() and join() methods Just Work™.
threading.Thread.__init__(self)
transfer.FileTransferer.__init__(self)
self.client = botoclient
def run(self) -> None:
files_transferred = 0
files_skipped = 0
for src, dst, act in self.iter_queue():
try:
did_upload = self.upload_file(src, dst)
files_transferred += did_upload
files_skipped += not did_upload
if act == transfer.Action.MOVE:
self.delete_file(src)
except Exception:
# We have to catch exceptions in a broad way, as this is running in
# a separate thread, and exceptions won't otherwise be seen.
log.exception('Error transferring %s to %s', src, dst)
# Put the files to copy back into the queue, and abort. This allows
# the main thread to inspect the queue and see which files were not
# copied. The one we just failed (due to this exception) should also
# be reported there.
self.queue.put((src, dst, act))
return
if files_transferred:
log.info('Transferred %d files', files_transferred)
if files_skipped:
log.info('Skipped %d files', files_skipped)
def upload_file(self, src: pathlib.Path, dst: pathlib.Path) -> bool:
"""Upload a file to an S3 bucket.
The first part of 'dst' is used as the bucket name, the remained as the
path inside the bucket.
:returns: True if the file was uploaded, False if it was skipped.
"""
bucket = dst.parts[0]
dst_path = pathlib.Path(*dst.parts[1:])
md5 = compute_md5(src)
key = str(dst_path)
existing_md5, existing_size = self.get_metadata(bucket, key)
if md5 == existing_md5 and src.stat().st_size == existing_size:
log.debug('skipping %s, it already exists on the server with MD5 %s',
src, existing_md5)
return False
# TODO(Sybren): when queueing files inspect their size, and have a
# callback that reports the total progress.
log.info('Uploading %s', src)
try:
self.client.upload_file(str(src),
Bucket=bucket,
Key=key,
Callback=self._upload_callback,
ExtraArgs={'Metadata': {'md5': md5}})
except self.AbortUpload:
return False
return True
def _upload_callback(self, bytes_uploaded: int):
if self.abort.is_set():
log.warning('Interrupting ongoing upload')
raise self.AbortUpload('interrupting ongoing upload')
def delete_file(self, path: pathlib.Path):
"""Deletes a file, only logging a warning if deletion fails."""
log.debug('Deleting %s, file has been uploaded', path)
try:
path.unlink()
except IOError as ex:
log.warning('Unable to delete %s: %s', path, ex)
def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]:
"""Get MD5 sum and size on S3.
:returns: the MD5 hexadecimal hash and the file size in bytes.
If the file does not exist or has no known MD5 sum,
returns ('', -1)
"""
import botocore.exceptions
try:
info = self.client.head_object(Bucket=bucket, Key=key)
except botocore.exceptions.ClientError as ex:
error_code = ex.response.get('Error').get('Code', 'Unknown')
# error_code already is a string, but this makes the code forward
# compatible with a time where they use integer codes.
if str(error_code) == '404':
return '', -1
raise ValueError('error response:' % ex.response) from None
try:
return info['Metadata']['md5'], info['ContentLength']
except KeyError:
return '', -1

View File

@ -39,5 +39,8 @@ setup(
'bat = blender_asset_tracer.cli:cli_main', 'bat = blender_asset_tracer.cli:cli_main',
], ],
}, },
extras_require={
's3': ['boto3'],
},
zip_safe=True, zip_safe=True,
) )