Added support for uploads to S3 by using the boto3 client.
This commit is contained in:
parent
4a0673918d
commit
ea832cd666
17
README.md
17
README.md
@ -9,6 +9,23 @@ Blender Asset Tracer, a.k.a. BAT🦇, is the replacement of
|
|||||||
Development is driven by choices explained in [T54125](https://developer.blender.org/T54125).
|
Development is driven by choices explained in [T54125](https://developer.blender.org/T54125).
|
||||||
|
|
||||||
|
|
||||||
|
## Uploading to S3-compatible storage
|
||||||
|
|
||||||
|
BAT Pack supports uploading to S3-compatible storage. This requires a credentials file in
|
||||||
|
`~/.aws/credentials`. Replace the all-capital words to suit your situation.
|
||||||
|
|
||||||
|
[ENDPOINT]
|
||||||
|
aws_access_key_id = YOUR_ACCESS_KEY_ID
|
||||||
|
aws_secret_access_key = YOUR_SECRET
|
||||||
|
|
||||||
|
You can then send a BAT Pack to the storage using a target `s3:/endpoint/bucketname/path-in-bucket`, for example:
|
||||||
|
|
||||||
|
bat pack my_blendfile.blend s3:/storage.qarnot.com/jobs/awesome_work
|
||||||
|
|
||||||
|
This will upload the blend file and its dependencies to `awesome_work/my_blendfile.blend` in
|
||||||
|
the `jobs` bucket.
|
||||||
|
|
||||||
|
|
||||||
## Paths
|
## Paths
|
||||||
|
|
||||||
There are two object types used to represent file paths. Those are strictly separated.
|
There are two object types used to represent file paths. Those are strictly separated.
|
||||||
|
|||||||
@ -30,9 +30,9 @@ def add_parser(subparsers):
|
|||||||
|
|
||||||
def cli_pack(args):
|
def cli_pack(args):
|
||||||
bpath, ppath, tpath = paths_from_cli(args)
|
bpath, ppath, tpath = paths_from_cli(args)
|
||||||
packer = pack.Packer(bpath, ppath, tpath, args.noop)
|
|
||||||
packer.strategise()
|
|
||||||
|
|
||||||
|
packer = create_packer(args, bpath, ppath, tpath)
|
||||||
|
packer.strategise()
|
||||||
try:
|
try:
|
||||||
packer.execute()
|
packer.execute()
|
||||||
except blender_asset_tracer.pack.transfer.FileTransferError as ex:
|
except blender_asset_tracer.pack.transfer.FileTransferError as ex:
|
||||||
@ -41,6 +41,25 @@ def cli_pack(args):
|
|||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def create_packer(args, bpath: pathlib.Path, ppath: pathlib.Path,
|
||||||
|
tpath: pathlib.Path) -> pack.Packer:
|
||||||
|
if str(tpath).startswith('s3:/'):
|
||||||
|
if args.noop:
|
||||||
|
raise ValueError('S3 uploader does not support no-op.')
|
||||||
|
|
||||||
|
from blender_asset_tracer.pack import s3
|
||||||
|
|
||||||
|
# Split the target path into 's3:/', hostname, and actual target path
|
||||||
|
parts = tpath.parts
|
||||||
|
endpoint = parts[1]
|
||||||
|
tpath = pathlib.Path(*tpath.parts[2:])
|
||||||
|
log.info('Uploading to S3-compatible storage %s at %s', endpoint, tpath)
|
||||||
|
|
||||||
|
return s3.S3Packer(bpath, ppath, tpath, endpoint=endpoint)
|
||||||
|
|
||||||
|
return pack.Packer(bpath, ppath, tpath, args.noop)
|
||||||
|
|
||||||
|
|
||||||
def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, pathlib.Path]:
|
def paths_from_cli(args) -> typing.Tuple[pathlib.Path, pathlib.Path, pathlib.Path]:
|
||||||
"""Return paths to blendfile, project, and pack target.
|
"""Return paths to blendfile, project, and pack target.
|
||||||
|
|
||||||
|
|||||||
166
blender_asset_tracer/pack/s3.py
Normal file
166
blender_asset_tracer/pack/s3.py
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
"""Amazon S3-compatible uploader."""
|
||||||
|
import typing
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import pathlib
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from . import Packer, transfer
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def compute_md5(filepath: pathlib.Path) -> str:
|
||||||
|
hasher = hashlib.md5()
|
||||||
|
with filepath.open('rb') as infile:
|
||||||
|
while True:
|
||||||
|
block = infile.read(10240)
|
||||||
|
if not block:
|
||||||
|
break
|
||||||
|
hasher.update(block)
|
||||||
|
md5 = hasher.hexdigest()
|
||||||
|
return md5
|
||||||
|
|
||||||
|
|
||||||
|
class S3Packer(Packer):
|
||||||
|
"""Creates BAT Packs on S3-compatible storage."""
|
||||||
|
|
||||||
|
def __init__(self, *args, endpoint, **kwargs) -> None:
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
# Create a session so that credentials can be read from the [endpoint]
|
||||||
|
# section in ~/.aws/credentials.
|
||||||
|
# See https://boto3.readthedocs.io/en/latest/guide/configuration.html#guide-configuration
|
||||||
|
self.session = boto3.Session(profile_name=endpoint)
|
||||||
|
self.client = self.session.client('s3', endpoint_url='https://%s' % endpoint)
|
||||||
|
|
||||||
|
def set_credentials(self,
|
||||||
|
endpoint: str,
|
||||||
|
access_key_id: str,
|
||||||
|
secret_access_key: str):
|
||||||
|
"""Set S3 credentials."""
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
self.client = boto3.client('s3',
|
||||||
|
endpoint_url=endpoint,
|
||||||
|
aws_access_key_id=access_key_id,
|
||||||
|
aws_secret_access_key=secret_access_key)
|
||||||
|
|
||||||
|
def _create_file_transferer(self) -> transfer.FileTransferer:
|
||||||
|
return S3Transferrer(self.client)
|
||||||
|
|
||||||
|
|
||||||
|
class S3Transferrer(threading.Thread, transfer.FileTransferer):
|
||||||
|
"""Copies or moves files in source directory order."""
|
||||||
|
|
||||||
|
class AbortUpload(Exception):
|
||||||
|
"""Raised from the upload callback to abort an upload."""
|
||||||
|
|
||||||
|
def __init__(self, botoclient) -> None:
|
||||||
|
# Stupid Thread.__init__ doesn't call super().__init__(),
|
||||||
|
# so it doesn't get chained to transfer.FileTransferer.__init__().
|
||||||
|
# However, I want to have Thread as first subclass so that its
|
||||||
|
# start() and join() methods Just Work™.
|
||||||
|
threading.Thread.__init__(self)
|
||||||
|
transfer.FileTransferer.__init__(self)
|
||||||
|
|
||||||
|
self.client = botoclient
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
files_transferred = 0
|
||||||
|
files_skipped = 0
|
||||||
|
|
||||||
|
for src, dst, act in self.iter_queue():
|
||||||
|
try:
|
||||||
|
did_upload = self.upload_file(src, dst)
|
||||||
|
files_transferred += did_upload
|
||||||
|
files_skipped += not did_upload
|
||||||
|
|
||||||
|
if act == transfer.Action.MOVE:
|
||||||
|
self.delete_file(src)
|
||||||
|
except Exception:
|
||||||
|
# We have to catch exceptions in a broad way, as this is running in
|
||||||
|
# a separate thread, and exceptions won't otherwise be seen.
|
||||||
|
log.exception('Error transferring %s to %s', src, dst)
|
||||||
|
# Put the files to copy back into the queue, and abort. This allows
|
||||||
|
# the main thread to inspect the queue and see which files were not
|
||||||
|
# copied. The one we just failed (due to this exception) should also
|
||||||
|
# be reported there.
|
||||||
|
self.queue.put((src, dst, act))
|
||||||
|
return
|
||||||
|
|
||||||
|
if files_transferred:
|
||||||
|
log.info('Transferred %d files', files_transferred)
|
||||||
|
if files_skipped:
|
||||||
|
log.info('Skipped %d files', files_skipped)
|
||||||
|
|
||||||
|
def upload_file(self, src: pathlib.Path, dst: pathlib.Path) -> bool:
|
||||||
|
"""Upload a file to an S3 bucket.
|
||||||
|
|
||||||
|
The first part of 'dst' is used as the bucket name, the remained as the
|
||||||
|
path inside the bucket.
|
||||||
|
|
||||||
|
:returns: True if the file was uploaded, False if it was skipped.
|
||||||
|
"""
|
||||||
|
bucket = dst.parts[0]
|
||||||
|
dst_path = pathlib.Path(*dst.parts[1:])
|
||||||
|
md5 = compute_md5(src)
|
||||||
|
key = str(dst_path)
|
||||||
|
|
||||||
|
existing_md5, existing_size = self.get_metadata(bucket, key)
|
||||||
|
if md5 == existing_md5 and src.stat().st_size == existing_size:
|
||||||
|
log.debug('skipping %s, it already exists on the server with MD5 %s',
|
||||||
|
src, existing_md5)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# TODO(Sybren): when queueing files inspect their size, and have a
|
||||||
|
# callback that reports the total progress.
|
||||||
|
log.info('Uploading %s', src)
|
||||||
|
try:
|
||||||
|
self.client.upload_file(str(src),
|
||||||
|
Bucket=bucket,
|
||||||
|
Key=key,
|
||||||
|
Callback=self._upload_callback,
|
||||||
|
ExtraArgs={'Metadata': {'md5': md5}})
|
||||||
|
except self.AbortUpload:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _upload_callback(self, bytes_uploaded: int):
|
||||||
|
if self.abort.is_set():
|
||||||
|
log.warning('Interrupting ongoing upload')
|
||||||
|
raise self.AbortUpload('interrupting ongoing upload')
|
||||||
|
|
||||||
|
def delete_file(self, path: pathlib.Path):
|
||||||
|
"""Deletes a file, only logging a warning if deletion fails."""
|
||||||
|
log.debug('Deleting %s, file has been uploaded', path)
|
||||||
|
try:
|
||||||
|
path.unlink()
|
||||||
|
except IOError as ex:
|
||||||
|
log.warning('Unable to delete %s: %s', path, ex)
|
||||||
|
|
||||||
|
def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]:
|
||||||
|
"""Get MD5 sum and size on S3.
|
||||||
|
|
||||||
|
:returns: the MD5 hexadecimal hash and the file size in bytes.
|
||||||
|
If the file does not exist or has no known MD5 sum,
|
||||||
|
returns ('', -1)
|
||||||
|
"""
|
||||||
|
import botocore.exceptions
|
||||||
|
|
||||||
|
try:
|
||||||
|
info = self.client.head_object(Bucket=bucket, Key=key)
|
||||||
|
except botocore.exceptions.ClientError as ex:
|
||||||
|
error_code = ex.response.get('Error').get('Code', 'Unknown')
|
||||||
|
# error_code already is a string, but this makes the code forward
|
||||||
|
# compatible with a time where they use integer codes.
|
||||||
|
if str(error_code) == '404':
|
||||||
|
return '', -1
|
||||||
|
raise ValueError('error response:' % ex.response) from None
|
||||||
|
|
||||||
|
try:
|
||||||
|
return info['Metadata']['md5'], info['ContentLength']
|
||||||
|
except KeyError:
|
||||||
|
return '', -1
|
||||||
Loading…
x
Reference in New Issue
Block a user