From ea832cd6667f1158cc74afb84492febc34220b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Fri, 9 Mar 2018 16:48:34 +0100 Subject: [PATCH] Added support for uploads to S3 by using the boto3 client. --- README.md | 17 ++++ blender_asset_tracer/cli/pack.py | 23 ++++- blender_asset_tracer/pack/s3.py | 166 +++++++++++++++++++++++++++++++ setup.py | 3 + 4 files changed, 207 insertions(+), 2 deletions(-) create mode 100644 blender_asset_tracer/pack/s3.py diff --git a/README.md b/README.md index 41cd4cf..dd41044 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,23 @@ Blender Asset Tracer, a.k.a. BAT🦇, is the replacement of Development is driven by choices explained in [T54125](https://developer.blender.org/T54125). +## Uploading to S3-compatible storage + +BAT Pack supports uploading to S3-compatible storage. This requires a credentials file in +`~/.aws/credentials`. Replace the all-capital words to suit your situation. + + [ENDPOINT] + aws_access_key_id = YOUR_ACCESS_KEY_ID + aws_secret_access_key = YOUR_SECRET + +You can then send a BAT Pack to the storage using a target `s3:/endpoint/bucketname/path-in-bucket`, for example: + + bat pack my_blendfile.blend s3:/storage.qarnot.com/jobs/awesome_work + +This will upload the blend file and its dependencies to `awesome_work/my_blendfile.blend` in +the `jobs` bucket. + + ## Paths There are two object types used to represent file paths. Those are strictly separated. diff --git a/blender_asset_tracer/cli/pack.py b/blender_asset_tracer/cli/pack.py index 41e26c2..cfc96e7 100644 --- a/blender_asset_tracer/cli/pack.py +++ b/blender_asset_tracer/cli/pack.py @@ -30,9 +30,9 @@ def add_parser(subparsers): def cli_pack(args): bpath, ppath, tpath = paths_from_cli(args) - packer = pack.Packer(bpath, ppath, tpath, args.noop) - packer.strategise() + packer = create_packer(args, bpath, ppath, tpath) + packer.strategise() try: packer.execute() except blender_asset_tracer.pack.transfer.FileTransferError as ex: @@ -41,6 +41,25 @@ def cli_pack(args): raise SystemExit(1) +def create_packer(args, bpath: pathlib.Path, ppath: pathlib.Path, + tpath: pathlib.Path) -> pack.Packer: + if str(tpath).startswith('s3:/'): + if args.noop: + raise ValueError('S3 uploader does not support no-op.') + + from blender_asset_tracer.pack import s3 + + # Split the target path into 's3:/', hostname, and actual target path + parts = tpath.parts + endpoint = parts[1] + tpath = pathlib.Path(*tpath.parts[2:]) + log.info('Uploading to S3-compatible storage %s at %s', endpoint, tpath) + + return s3.S3Packer(bpath, ppath, tpath, endpoint=endpoint) + + return pack.Packer(bpath, ppath, tpath, args.noop) + + def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, pathlib.Path]: """Return paths to blendfile, project, and pack target. diff --git a/blender_asset_tracer/pack/s3.py b/blender_asset_tracer/pack/s3.py new file mode 100644 index 0000000..0bae6f2 --- /dev/null +++ b/blender_asset_tracer/pack/s3.py @@ -0,0 +1,166 @@ +"""Amazon S3-compatible uploader.""" +import typing + +import hashlib +import logging +import pathlib +import threading + +from . import Packer, transfer + +log = logging.getLogger(__name__) + + +def compute_md5(filepath: pathlib.Path) -> str: + hasher = hashlib.md5() + with filepath.open('rb') as infile: + while True: + block = infile.read(10240) + if not block: + break + hasher.update(block) + md5 = hasher.hexdigest() + return md5 + + +class S3Packer(Packer): + """Creates BAT Packs on S3-compatible storage.""" + + def __init__(self, *args, endpoint, **kwargs) -> None: + super().__init__(*args, **kwargs) + import boto3 + + # Create a session so that credentials can be read from the [endpoint] + # section in ~/.aws/credentials. + # See https://boto3.readthedocs.io/en/latest/guide/configuration.html#guide-configuration + self.session = boto3.Session(profile_name=endpoint) + self.client = self.session.client('s3', endpoint_url='https://%s' % endpoint) + + def set_credentials(self, + endpoint: str, + access_key_id: str, + secret_access_key: str): + """Set S3 credentials.""" + import boto3 + + self.client = boto3.client('s3', + endpoint_url=endpoint, + aws_access_key_id=access_key_id, + aws_secret_access_key=secret_access_key) + + def _create_file_transferer(self) -> transfer.FileTransferer: + return S3Transferrer(self.client) + + +class S3Transferrer(threading.Thread, transfer.FileTransferer): + """Copies or moves files in source directory order.""" + + class AbortUpload(Exception): + """Raised from the upload callback to abort an upload.""" + + def __init__(self, botoclient) -> None: + # Stupid Thread.__init__ doesn't call super().__init__(), + # so it doesn't get chained to transfer.FileTransferer.__init__(). + # However, I want to have Thread as first subclass so that its + # start() and join() methods Just Work™. + threading.Thread.__init__(self) + transfer.FileTransferer.__init__(self) + + self.client = botoclient + + def run(self) -> None: + files_transferred = 0 + files_skipped = 0 + + for src, dst, act in self.iter_queue(): + try: + did_upload = self.upload_file(src, dst) + files_transferred += did_upload + files_skipped += not did_upload + + if act == transfer.Action.MOVE: + self.delete_file(src) + except Exception: + # We have to catch exceptions in a broad way, as this is running in + # a separate thread, and exceptions won't otherwise be seen. + log.exception('Error transferring %s to %s', src, dst) + # Put the files to copy back into the queue, and abort. This allows + # the main thread to inspect the queue and see which files were not + # copied. The one we just failed (due to this exception) should also + # be reported there. + self.queue.put((src, dst, act)) + return + + if files_transferred: + log.info('Transferred %d files', files_transferred) + if files_skipped: + log.info('Skipped %d files', files_skipped) + + def upload_file(self, src: pathlib.Path, dst: pathlib.Path) -> bool: + """Upload a file to an S3 bucket. + + The first part of 'dst' is used as the bucket name, the remained as the + path inside the bucket. + + :returns: True if the file was uploaded, False if it was skipped. + """ + bucket = dst.parts[0] + dst_path = pathlib.Path(*dst.parts[1:]) + md5 = compute_md5(src) + key = str(dst_path) + + existing_md5, existing_size = self.get_metadata(bucket, key) + if md5 == existing_md5 and src.stat().st_size == existing_size: + log.debug('skipping %s, it already exists on the server with MD5 %s', + src, existing_md5) + return False + + # TODO(Sybren): when queueing files inspect their size, and have a + # callback that reports the total progress. + log.info('Uploading %s', src) + try: + self.client.upload_file(str(src), + Bucket=bucket, + Key=key, + Callback=self._upload_callback, + ExtraArgs={'Metadata': {'md5': md5}}) + except self.AbortUpload: + return False + return True + + def _upload_callback(self, bytes_uploaded: int): + if self.abort.is_set(): + log.warning('Interrupting ongoing upload') + raise self.AbortUpload('interrupting ongoing upload') + + def delete_file(self, path: pathlib.Path): + """Deletes a file, only logging a warning if deletion fails.""" + log.debug('Deleting %s, file has been uploaded', path) + try: + path.unlink() + except IOError as ex: + log.warning('Unable to delete %s: %s', path, ex) + + def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]: + """Get MD5 sum and size on S3. + + :returns: the MD5 hexadecimal hash and the file size in bytes. + If the file does not exist or has no known MD5 sum, + returns ('', -1) + """ + import botocore.exceptions + + try: + info = self.client.head_object(Bucket=bucket, Key=key) + except botocore.exceptions.ClientError as ex: + error_code = ex.response.get('Error').get('Code', 'Unknown') + # error_code already is a string, but this makes the code forward + # compatible with a time where they use integer codes. + if str(error_code) == '404': + return '', -1 + raise ValueError('error response:' % ex.response) from None + + try: + return info['Metadata']['md5'], info['ContentLength'] + except KeyError: + return '', -1 diff --git a/setup.py b/setup.py index a69ebc7..eb566bf 100644 --- a/setup.py +++ b/setup.py @@ -39,5 +39,8 @@ setup( 'bat = blender_asset_tracer.cli:cli_main', ], }, + extras_require={ + 's3': ['boto3'], + }, zip_safe=True, )